aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-06-10 13:44:06 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-06-10 13:44:06 +0000
commit7ab83427af0f77b59941ceba41d509d7d097b065 (patch)
treecc41c05b1db454e3d802f34df75e636ee922ad87 /test
parentd288ef4c1788d3a951a7558c68312c2d320612b1 (diff)
downloadsrc-7ab83427af0f77b59941ceba41d509d7d097b065.tar.gz
src-7ab83427af0f77b59941ceba41d509d7d097b065.zip
Vendor import of llvm trunk r305145:vendor/llvm/llvm-trunk-r305145
Notes
Notes: svn path=/vendor/llvm/dist/; revision=319780 svn path=/vendor/llvm/llvm-trunk-r305145/; revision=319781; tag=vendor/llvm/llvm-trunk-r305145
Diffstat (limited to 'test')
-rw-r--r--test/Analysis/BranchProbabilityInfo/libfunc_call.ll264
-rw-r--r--test/Analysis/ConstantFolding/gep-constanfolding-error.ll2
-rw-r--r--test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll68
-rw-r--r--test/Bindings/OCaml/core.ml11
-rw-r--r--test/Bitcode/ptest-old.ll2
-rw-r--r--test/BugPoint/unsymbolized.ll21
-rw-r--r--test/CMakeLists.txt2
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll2
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll23
-rw-r--r--test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir158
-rw-r--r--test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll4
-rw-r--r--test/CodeGen/AArch64/GlobalISel/call-translator.ll10
-rw-r--r--test/CodeGen/AArch64/GlobalISel/debug-insts.ll4
-rw-r--r--test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir16
-rw-r--r--test/CodeGen/AArch64/GlobalISel/localizer.mir80
-rw-r--r--test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir2
-rw-r--r--test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir164
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-binop.mir200
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-bitcast.mir32
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir72
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-int-ext.mir46
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir24
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-load.mir100
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-muladd.mir14
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-store.mir84
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-trunc.mir12
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select-xor.mir30
-rw-r--r--test/CodeGen/AArch64/GlobalISel/select.mir52
-rw-r--r--test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll131
-rw-r--r--test/CodeGen/AArch64/spill-undef.mir67
-rw-r--r--test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir24
-rw-r--r--test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir28
-rw-r--r--test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir14
-rw-r--r--test/CodeGen/AMDGPU/add.v2i16.ll8
-rw-r--r--test/CodeGen/AMDGPU/ashr.v2i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/branch-relax-spill.ll420
-rw-r--r--test/CodeGen/AMDGPU/clamp-omod-special-case.mir20
-rw-r--r--test/CodeGen/AMDGPU/exceed-max-sgprs.ll142
-rw-r--r--test/CodeGen/AMDGPU/fabs.f16.ll8
-rw-r--r--test/CodeGen/AMDGPU/fadd.f16.ll6
-rw-r--r--test/CodeGen/AMDGPU/fcanonicalize.f16.ll6
-rw-r--r--test/CodeGen/AMDGPU/flat-scratch-reg.ll14
-rw-r--r--test/CodeGen/AMDGPU/fmul.f16.ll6
-rw-r--r--test/CodeGen/AMDGPU/fneg-fabs.f16.ll8
-rw-r--r--test/CodeGen/AMDGPU/fneg.f16.ll2
-rw-r--r--test/CodeGen/AMDGPU/fptosi.f16.ll2
-rw-r--r--test/CodeGen/AMDGPU/fptoui.f16.ll2
-rw-r--r--test/CodeGen/AMDGPU/fsub.f16.ll4
-rw-r--r--test/CodeGen/AMDGPU/hsa-note-no-func.ll13
-rw-r--r--test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll20
-rw-r--r--test/CodeGen/AMDGPU/immv216.ll38
-rw-r--r--test/CodeGen/AMDGPU/indirect-addressing-si.ll2
-rw-r--r--test/CodeGen/AMDGPU/inline-asm.ll12
-rw-r--r--test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll8
-rw-r--r--test/CodeGen/AMDGPU/limit-coalesce.mir14
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll23
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll22
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll57
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll22
-rw-r--r--test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.maxnum.f16.ll6
-rw-r--r--test/CodeGen/AMDGPU/llvm.minnum.f16.ll6
-rw-r--r--test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll10
-rw-r--r--test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll131
-rw-r--r--test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir69
-rw-r--r--test/CodeGen/AMDGPU/scratch-simple.ll6
-rw-r--r--test/CodeGen/AMDGPU/sdwa-peephole.ll24
-rw-r--r--test/CodeGen/AMDGPU/shl.v2i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll88
-rw-r--r--test/CodeGen/AMDGPU/skip-if-dead.ll12
-rw-r--r--test/CodeGen/AMDGPU/sminmax.v2i16.ll8
-rw-r--r--test/CodeGen/AMDGPU/spill-scavenge-offset.ll14
-rw-r--r--test/CodeGen/AMDGPU/sub.v2i16.ll12
-rw-r--r--test/CodeGen/AMDGPU/undefined-subreg-liverange.ll6
-rw-r--r--test/CodeGen/AMDGPU/v_mac_f16.ll31
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir149
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir115
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll90
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-isel.ll84
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir36
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-legalizer.mir242
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir281
-rw-r--r--test/CodeGen/ARM/clang-section.ll140
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-vfma.ll91
-rw-r--r--test/CodeGen/ARM/invalidated-save-point.ll4
-rw-r--r--test/CodeGen/Generic/llc-start-stop.ll4
-rw-r--r--test/CodeGen/Hexagon/common-gep-inbounds.ll20
-rw-r--r--test/CodeGen/Hexagon/mux-undef.ll27
-rw-r--r--test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir1
-rw-r--r--test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir1
-rw-r--r--test/CodeGen/MIR/AArch64/register-operand-bank.mir4
-rw-r--r--test/CodeGen/MIR/AArch64/stack-object-local-offset.mir4
-rw-r--r--test/CodeGen/MIR/Generic/frame-info.mir5
-rw-r--r--test/CodeGen/MIR/Generic/function-missing-machine-function.mir13
-rw-r--r--test/CodeGen/MIR/X86/callee-saved-info.mir4
-rw-r--r--test/CodeGen/MIR/X86/empty0.mir6
-rw-r--r--test/CodeGen/MIR/X86/empty1.mir8
-rw-r--r--test/CodeGen/MIR/X86/empty2.mir8
-rw-r--r--test/CodeGen/MIR/X86/fixed-stack-objects.mir2
-rw-r--r--test/CodeGen/MIR/X86/generic-instr-type.mir10
-rw-r--r--test/CodeGen/MIR/X86/inline-asm.mir12
-rw-r--r--test/CodeGen/MIR/X86/register-operand-class.mir12
-rw-r--r--test/CodeGen/MIR/X86/roundtrip.mir20
-rw-r--r--test/CodeGen/MIR/X86/simple-register-allocation-hints.mir2
-rw-r--r--test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir2
-rw-r--r--test/CodeGen/MIR/X86/stack-object-debug-info.mir5
-rw-r--r--test/CodeGen/MIR/X86/stack-objects.mir9
-rw-r--r--test/CodeGen/MIR/X86/variable-sized-stack-objects.mir8
-rw-r--r--test/CodeGen/MIR/X86/virtual-registers.mir12
-rw-r--r--test/CodeGen/Mips/biggot.ll3
-rw-r--r--test/CodeGen/Mips/cconv/vector.ll1657
-rw-r--r--test/CodeGen/Mips/ctlz-v.ll12
-rw-r--r--test/CodeGen/Mips/cttz-v.ll19
-rw-r--r--test/CodeGen/Mips/dsp-r1.ll12
-rw-r--r--test/CodeGen/Mips/fmadd1.ll455
-rw-r--r--test/CodeGen/Mips/llvm-ir/mul.ll2
-rw-r--r--test/CodeGen/Mips/llvm-ir/sdiv.ll12
-rw-r--r--test/CodeGen/Mips/llvm-ir/srem.ll11
-rw-r--r--test/CodeGen/Mips/llvm-ir/udiv.ll11
-rw-r--r--test/CodeGen/Mips/llvm-ir/urem.ll6
-rw-r--r--test/CodeGen/Mips/micromips-gp-rc.ll2
-rw-r--r--test/CodeGen/Mips/mips64fpldst.ll12
-rw-r--r--test/CodeGen/Mips/pbqp-reserved-physreg.ll35
-rw-r--r--test/CodeGen/Mips/return-vector.ll33
-rw-r--r--test/CodeGen/Mips/tailcall/tailcall.ll4
-rw-r--r--test/CodeGen/PowerPC/BoolRetToIntTest-2.ll19
-rw-r--r--test/CodeGen/PowerPC/BoolRetToIntTest.ll28
-rw-r--r--test/CodeGen/PowerPC/crbits.ll46
-rw-r--r--test/CodeGen/PowerPC/logic-ops-on-compares.ll73
-rw-r--r--test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll277
-rw-r--r--test/CodeGen/PowerPC/ppc-crbits-onoff.ll18
-rw-r--r--test/CodeGen/PowerPC/setcc-logic.ll16
-rw-r--r--test/CodeGen/PowerPC/testComparesinesc.ll121
-rw-r--r--test/CodeGen/PowerPC/testComparesinesi.ll121
-rw-r--r--test/CodeGen/PowerPC/testComparesinesll.ll125
-rw-r--r--test/CodeGen/PowerPC/testComparesiness.ll121
-rw-r--r--test/CodeGen/PowerPC/testComparesineuc.ll136
-rw-r--r--test/CodeGen/PowerPC/testComparesineui.ll121
-rw-r--r--test/CodeGen/PowerPC/testComparesineull.ll125
-rw-r--r--test/CodeGen/PowerPC/testComparesineus.ll137
-rw-r--r--test/CodeGen/PowerPC/testComparesllnesll.ll125
-rw-r--r--test/CodeGen/PowerPC/testComparesllneull.ll125
-rw-r--r--test/CodeGen/PowerPC/vec_int_ext.ll90
-rw-r--r--test/CodeGen/X86/2006-05-11-InstrSched.ll2
-rw-r--r--test/CodeGen/X86/GlobalISel/irtranslator-call.ll30
-rw-r--r--test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll122
-rw-r--r--test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll25
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir20
-rw-r--r--test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir20
-rw-r--r--test/CodeGen/X86/GlobalISel/regbankselect-X32.mir10
-rw-r--r--test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir170
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add-v128.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add-v256.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add-v512.mir24
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add-x32.mir20
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-cmp.mir130
-rw-r--r--test/CodeGen/X86/GlobalISel/select-constant.mir12
-rw-r--r--test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir16
-rw-r--r--test/CodeGen/X86/GlobalISel/select-ext.mir22
-rw-r--r--test/CodeGen/X86/GlobalISel/select-gep.mir6
-rw-r--r--test/CodeGen/X86/GlobalISel/select-inc.mir8
-rw-r--r--test/CodeGen/X86/GlobalISel/select-leaf-constant.mir8
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir56
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-scalar.mir76
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-v128.mir24
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-v256.mir32
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-v512.mir16
-rw-r--r--test/CodeGen/X86/GlobalISel/select-mul-scalar.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/select-mul-vec.mir90
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub-v128.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub-v256.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub-v512.mir24
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub.mir60
-rw-r--r--test/CodeGen/X86/GlobalISel/select-trunc.mir24
-rw-r--r--test/CodeGen/X86/O0-pipeline.ll2
-rw-r--r--test/CodeGen/X86/atom-fixup-lea3.ll11
-rw-r--r--test/CodeGen/X86/avx-schedule.ll32
-rw-r--r--test/CodeGen/X86/avx-splat.ll36
-rw-r--r--test/CodeGen/X86/avx512-cvt.ll4
-rw-r--r--test/CodeGen/X86/build-vector-128.ll92
-rw-r--r--test/CodeGen/X86/buildvec-insertvec.ll72
-rw-r--r--test/CodeGen/X86/clear_upper_vector_element_bits.ll195
-rw-r--r--test/CodeGen/X86/fast-isel-nontemporal.ll108
-rw-r--r--test/CodeGen/X86/full-lsr.ll10
-rw-r--r--test/CodeGen/X86/haddsub-2.ll348
-rw-r--r--test/CodeGen/X86/haddsub-undef.ll5
-rw-r--r--test/CodeGen/X86/hoist-spill.ll2
-rw-r--r--test/CodeGen/X86/loop-strength-reduce4.ll15
-rw-r--r--test/CodeGen/X86/madd.ll78
-rw-r--r--test/CodeGen/X86/masked-iv-safe.ll16
-rw-r--r--test/CodeGen/X86/memcmp.ll330
-rw-r--r--test/CodeGen/X86/merge-consecutive-loads-128.ll36
-rw-r--r--test/CodeGen/X86/mul-constant-i16.ll139
-rw-r--r--test/CodeGen/X86/mul-constant-i32.ll1578
-rw-r--r--test/CodeGen/X86/mul-constant-i64.ll1605
-rw-r--r--test/CodeGen/X86/mul-constant-result.ll1291
-rw-r--r--test/CodeGen/X86/nontemporal-loads.ll779
-rw-r--r--test/CodeGen/X86/pr32659.ll83
-rw-r--r--test/CodeGen/X86/select.ll14
-rw-r--r--test/CodeGen/X86/selectiondag-dominator.ll30
-rw-r--r--test/CodeGen/X86/sse-intrinsics-fast-isel.ll74
-rw-r--r--test/CodeGen/X86/sse1.ll80
-rw-r--r--test/CodeGen/X86/sse2-intrinsics-fast-isel.ll159
-rw-r--r--test/CodeGen/X86/sse3-avx-addsub-2.ll14
-rw-r--r--test/CodeGen/X86/sse42-intrinsics-fast-isel.ll6
-rw-r--r--test/CodeGen/X86/stack-folding-fp-avx1.ll21
-rw-r--r--test/CodeGen/X86/stack-folding-int-sse42.ll17
-rw-r--r--test/CodeGen/X86/trunc-to-bool.ll70
-rw-r--r--test/CodeGen/X86/vec_fp_to_int.ll18
-rw-r--r--test/CodeGen/X86/vec_int_to_fp.ll286
-rw-r--r--test/CodeGen/X86/vec_set.ll24
-rw-r--r--test/CodeGen/X86/vector-compare-results.ll538
-rw-r--r--test/CodeGen/X86/vector-rem.ll34
-rw-r--r--test/CodeGen/X86/vector-sext.ll784
-rw-r--r--test/CodeGen/X86/vector-shuffle-v48.ll49
-rw-r--r--test/CodeGen/X86/vector-shuffle-variable-128.ll274
-rw-r--r--test/CodeGen/X86/vector-sqrt.ll18
-rw-r--r--test/CodeGen/X86/vector-unsigned-cmp.ll134
-rw-r--r--test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll4
-rw-r--r--test/CodeGen/X86/vshift-1.ll9
-rw-r--r--test/CodeGen/X86/vshift-2.ll9
-rw-r--r--test/CodeGen/X86/x86-interleaved-access.ll93
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-dwp.s277
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.s34
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.obin0 -> 824 bytes
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.s36
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.obin0 -> 832 bytes
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.s88
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.obin0 -> 2296 bytes
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.s50
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.obin0 -> 1264 bytes
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.s10
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.obin0 -> 464 bytes
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets.s500
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-str-offsets.x86_64.obin0 -> 4000 bytes
-rw-r--r--test/DebugInfo/PDB/DIA/pdbdump-flags.test8
-rw-r--r--test/DebugInfo/PDB/DIA/pdbdump-linenumbers.test4
-rw-r--r--test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test10
-rw-r--r--test/DebugInfo/PDB/Inputs/debug-subsections.yaml91
-rw-r--r--test/DebugInfo/PDB/Inputs/simple-line-info.yaml44
-rw-r--r--test/DebugInfo/PDB/Native/pdb-native-compilands.test4
-rw-r--r--test/DebugInfo/PDB/Native/pdb-native-summary.test2
-rw-r--r--test/DebugInfo/PDB/pdb-longname-truncation.test2
-rw-r--r--test/DebugInfo/PDB/pdb-minimal-construct.test22
-rw-r--r--test/DebugInfo/PDB/pdb-yaml-symbols.test2
-rw-r--r--test/DebugInfo/PDB/pdb-yaml-types.test6
-rw-r--r--test/DebugInfo/PDB/pdbdump-debug-subsections.test210
-rw-r--r--test/DebugInfo/PDB/pdbdump-headers.test70
-rw-r--r--test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test18
-rw-r--r--test/DebugInfo/PDB/pdbdump-mergeids.test12
-rw-r--r--test/DebugInfo/PDB/pdbdump-mergetypes.test10
-rw-r--r--test/DebugInfo/PDB/pdbdump-raw-blocks.test10
-rw-r--r--test/DebugInfo/PDB/pdbdump-raw-stream.test4
-rw-r--r--test/DebugInfo/PDB/pdbdump-readwrite.test10
-rw-r--r--test/DebugInfo/PDB/pdbdump-source-names.test8
-rw-r--r--test/DebugInfo/PDB/pdbdump-write.test14
-rw-r--r--test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test71
-rw-r--r--test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test60
-rw-r--r--test/DebugInfo/PDB/pdbdump-yaml-types.test2
-rw-r--r--test/DebugInfo/PDB/pdbdump-yaml.test4
-rw-r--r--test/DebugInfo/dwarfdump-str-offsets-invalid.test24
-rw-r--r--test/DebugInfo/dwarfdump-str-offsets.test76
-rw-r--r--test/FileCheck/check-dag.txt9
-rw-r--r--test/Instrumentation/MemorySanitizer/csr.ll2
-rw-r--r--test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll2
-rw-r--r--test/Instrumentation/MemorySanitizer/vector_arith.ll2
-rw-r--r--test/Instrumentation/MemorySanitizer/vector_cmp.ll2
-rw-r--r--test/Instrumentation/MemorySanitizer/vector_cvt.ll2
-rw-r--r--test/Instrumentation/MemorySanitizer/vector_pack.ll2
-rw-r--r--test/Instrumentation/MemorySanitizer/vector_shift.ll2
-rw-r--r--test/LTO/ARM/Inputs/thumb.ll15
-rw-r--r--test/LTO/ARM/link-arm-and-thumb.ll32
-rw-r--r--test/LTO/Resolution/X86/linker-redef.ll16
-rw-r--r--test/Linker/Inputs/thumb.ll16
-rw-r--r--test/Linker/link-arm-and-thumb.ll23
-rw-r--r--test/MC/AMDGPU/sopp-err.s8
-rw-r--r--test/MC/AMDGPU/sym_option.s4
-rw-r--r--test/MC/ARM/arm-thumb-tail-call.ll25
-rw-r--r--test/MC/ARM/big-endian-thumb2-fixup.s6
-rw-r--r--test/MC/ARM/t2-modified-immediate-fixup-error1.s13
-rw-r--r--test/MC/ARM/t2-modified-immediate-fixup-error2.s12
-rw-r--r--test/MC/ARM/t2-modified-immediate-fixup.s45
-rw-r--r--test/MC/ARM/thumb2-diagnostics.s2
-rw-r--r--test/MC/AsmParser/empty-comment.s4
-rw-r--r--test/MC/Disassembler/Mips/micromips-dsp/valid.txt2
-rw-r--r--test/MC/ELF/ARM/clang-section.s399
-rw-r--r--test/MC/MachO/alias.s12
-rw-r--r--test/MC/MachO/variable-exprs.s8
-rw-r--r--test/MC/Mips/dsp/invalid.s4
-rw-r--r--test/MC/Mips/micromips-dsp/invalid.s2
-rw-r--r--test/MC/Mips/micromips-dsp/valid.s2
-rw-r--r--test/MC/WebAssembly/reloc-code.ll20
-rw-r--r--test/Object/AMDGPU/elf-definitions.yaml21
-rw-r--r--test/Object/objc-imageinfo-coff.ll15
-rw-r--r--test/Object/objc-imageinfo-elf.ll15
-rw-r--r--test/Object/objc-imageinfo-macho.ll15
-rw-r--r--test/Transforms/CodeGenPrepare/X86/memcmp.ll337
-rw-r--r--test/Transforms/ConstProp/sse.ll2
-rw-r--r--test/Transforms/DCE/calls-errno.ll4
-rw-r--r--test/Transforms/GVNSink/sink-common-code.ll57
-rw-r--r--test/Transforms/IRCE/correct-loop-info.ll182
-rw-r--r--test/Transforms/IndVarSimplify/lftr_disabled.ll28
-rw-r--r--test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll36
-rw-r--r--test/Transforms/Inline/basictest.ll24
-rw-r--r--test/Transforms/InstCombine/constant-fold-libfunc.ll20
-rw-r--r--test/Transforms/InstCombine/insert-extract-shuffle.ll23
-rw-r--r--test/Transforms/InstCombine/intrinsics.ll154
-rw-r--r--test/Transforms/InstCombine/lshr.ll43
-rw-r--r--test/Transforms/InstSimplify/call.ll33
-rw-r--r--test/Transforms/InstSimplify/compare.ll2
-rw-r--r--test/Transforms/InstSimplify/simplify-nested-bitcast.ll54
-rw-r--r--test/Transforms/InstSimplify/vector_gep.ll2
-rw-r--r--test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll61
-rw-r--r--test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll452
-rw-r--r--test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll28
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/canonical.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll4
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll14
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll4
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll4
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/nested-loop.ll22
-rw-r--r--test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll46
-rw-r--r--test/Transforms/LowerExpectIntrinsic/PR33346.ll22
-rw-r--r--test/Transforms/LowerTypeTests/simple.ll16
-rw-r--r--test/Transforms/LowerTypeTests/simplify.ll37
-rw-r--r--test/Transforms/NewGVN/completeness.ll2
-rw-r--r--test/Transforms/NewGVN/loadforward.ll4
-rw-r--r--test/Transforms/NewGVN/pr32403.ll3
-rw-r--r--test/Transforms/NewGVN/pr32897.ll1
-rw-r--r--test/Transforms/NewGVN/pr33187.ll148
-rw-r--r--test/Transforms/SLPVectorizer/X86/arith-fp.ll48
-rw-r--r--test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll138
-rw-r--r--test/Transforms/SROA/address-spaces.ll28
-rw-r--r--test/Transforms/SampleProfile/Inputs/indirect-call.prof3
-rw-r--r--test/Transforms/SampleProfile/indirect-call.ll13
-rw-r--r--test/Transforms/Sink/badloadsink.ll18
-rw-r--r--test/Transforms/ThinLTOBitcodeWriter/split.ll4
-rw-r--r--test/Transforms/Util/PredicateInfo/condprop2.ll2
-rw-r--r--test/Transforms/Util/PredicateInfo/testandor2.ll2
-rw-r--r--test/lit.cfg3
-rw-r--r--test/lit.site.cfg.in1
-rw-r--r--test/tools/llvm-cvtres/Inputs/test_resource.obj.coffbin0 -> 3468 bytes
-rw-r--r--test/tools/llvm-cvtres/object.test229
-rw-r--r--test/tools/llvm-cvtres/parse.test (renamed from test/tools/llvm-cvtres/resource.test)2
-rw-r--r--test/tools/llvm-dwarfdump/X86/brief.s131
-rw-r--r--test/tools/llvm-dwarfdump/X86/lit.local.cfg2
-rw-r--r--test/tools/llvm-pdbdump/class-layout.test2
-rw-r--r--test/tools/llvm-pdbdump/complex-padding-graphical.test2
-rw-r--r--test/tools/llvm-pdbdump/enum-layout.test2
-rw-r--r--test/tools/llvm-pdbdump/load-address.test4
-rw-r--r--test/tools/llvm-pdbdump/raw-stream-data.test6
-rw-r--r--test/tools/llvm-pdbdump/regex-filter.test20
-rw-r--r--test/tools/llvm-pdbdump/simple-padding-graphical.test2
-rw-r--r--test/tools/llvm-pdbdump/symbol-filters.test16
-rwxr-xr-xtest/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveribin13208 -> 0 bytes
-rw-r--r--test/tools/llvm-readobj/Inputs/trivial.obj.elf-amdhsa-gfx803bin0 -> 2208 bytes
-rw-r--r--test/tools/llvm-readobj/amdgpu-elf-definitions.test11
-rw-r--r--test/tools/llvm-readobj/amdgpu-elf-defs.test28
-rw-r--r--test/tools/llvm-readobj/elf-sec-flags.test29
365 files changed, 19389 insertions, 5517 deletions
diff --git a/test/Analysis/BranchProbabilityInfo/libfunc_call.ll b/test/Analysis/BranchProbabilityInfo/libfunc_call.ll
new file mode 100644
index 000000000000..13bc0de90a61
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/libfunc_call.ll
@@ -0,0 +1,264 @@
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -analyze -lazy-branch-prob | FileCheck %s
+; RUN: opt < %s -passes='print<branch-prob>' -disable-output 2>&1 | FileCheck %s
+
+declare i32 @strcmp(i8*, i8*)
+declare i32 @strncmp(i8*, i8*, i32)
+declare i32 @strcasecmp(i8*, i8*)
+declare i32 @strncasecmp(i8*, i8*, i32)
+declare i32 @memcmp(i8*, i8*)
+declare i32 @nonstrcmp(i8*, i8*)
+
+
+; Check that the result of strcmp is considered more likely to be nonzero than
+; zero, and equally likely to be (nonzero) positive or negative.
+
+define i32 @test_strcmp_eq(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcmp_eq'
+entry:
+ %val = call i32 @strcmp(i8* %p, i8* %q)
+ %cond = icmp eq i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50%
+; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+define i32 @test_strcmp_ne(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcmp_ne'
+entry:
+ %val = call i32 @strcmp(i8* %p, i8* %q)
+ %cond = icmp ne i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+define i32 @test_strcmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcmp_sgt'
+entry:
+ %val = call i32 @strcmp(i8* %p, i8* %q)
+ %cond = icmp sgt i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+define i32 @test_strcmp_slt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcmp_slt'
+entry:
+ %val = call i32 @strcmp(i8* %p, i8* %q)
+ %cond = icmp slt i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+
+; Similarly check other library functions that have the same behaviour
+
+define i32 @test_strncmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strncmp_sgt'
+entry:
+ %val = call i32 @strncmp(i8* %p, i8* %q, i32 4)
+ %cond = icmp sgt i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+define i32 @test_strcasecmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcasecmp_sgt'
+entry:
+ %val = call i32 @strcasecmp(i8* %p, i8* %q)
+ %cond = icmp sgt i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+define i32 @test_strncasecmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strncasecmp_sgt'
+entry:
+ %val = call i32 @strncasecmp(i8* %p, i8* %q, i32 4)
+ %cond = icmp sgt i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+define i32 @test_memcmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_memcmp_sgt'
+entry:
+ %val = call i32 @memcmp(i8* %p, i8* %q)
+ %cond = icmp sgt i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+
+; Check that for the result of a call to a non-library function the default
+; heuristic is applied, i.e. positive more likely than negative, nonzero more
+; likely than zero.
+
+define i32 @test_nonstrcmp_eq(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_eq'
+entry:
+ %val = call i32 @nonstrcmp(i8* %p, i8* %q)
+ %cond = icmp eq i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50%
+; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+define i32 @test_nonstrcmp_ne(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_ne'
+entry:
+ %val = call i32 @nonstrcmp(i8* %p, i8* %q)
+ %cond = icmp ne i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
+
+define i32 @test_nonstrcmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_sgt'
+entry:
+ %val = call i32 @nonstrcmp(i8* %p, i8* %q)
+ %cond = icmp sgt i32 %val, 0
+ br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50%
+
+then:
+ br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+ br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+ %result = phi i32 [ 0, %then ], [ 1, %else ]
+ ret i32 %result
+}
diff --git a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll
index 50ad61a8f100..16bc8a983e48 100644
--- a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll
+++ b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll
@@ -44,7 +44,7 @@ entry:
%9 = add i32 %f.promoted, %smax
%10 = add i32 %9, 2
call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i32 1, i1 false)
-; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i64 5, i64 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false)
; CHECK-NOT: call void @llvm.memset.p0i8.i32(i8* getelementptr ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i64 1, i64 4, i64 4, i32 1)
ret i32 0
}
diff --git a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
index 00ab21e46d5d..3a0ab0f03b99 100644
--- a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
+++ b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
@@ -10,17 +10,23 @@
define i8 @test1(i32 %a, i32 %length) {
; CHECK-LABEL: LVI for function 'test1':
entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: ; LatticeVal for: 'i32 %a' is: overdefined
+; CHECK-NEXT: ; LatticeVal for: 'i32 %length' is: overdefined
br label %loop
+
; CHECK-LABEL: backedge:
-; CHECK-NEXT: ; CachedLatticeValues for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]'
-; CHECK-DAG: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<0, 400>'
-; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
-; CHECK-NEXT: ; CachedLatticeValues for: ' %iv.next = add nsw i32 %iv, 1'
-; CHECK-NEXT: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<1, 401>'
-; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1
+; CHECK-NEXT: ; LatticeVal for: 'i32 %a' is: overdefined
+; CHECK-NEXT: ; LatticeVal for: 'i32 %length' is: overdefined
+; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, 400>
+; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<399, 400>
+; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
+; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, 401>
+; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%exit' is: constantrange<400, 401>
+; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1
+; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined
+; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%exit' is: constantrange<0, -1>
; CHECK-NEXT: %cont = icmp slt i32 %iv.next, 400
-; CHECK-NEXT: br i1 %cont, label %backedge, label %exit
-
; CHECK-NOT: loop
loop:
%iv = phi i32 [0, %entry], [%iv.next, %backedge]
@@ -36,46 +42,58 @@ exit:
ret i8 0
}
-
; Here JT does not transform the code, but LVICache is populated during the processing of blocks.
define i8 @test2(i32 %n) {
; CHECK-LABEL: LVI for function 'test2':
; CHECK-LABEL: entry:
-; CHECK-LABEL: ; OverDefined values for block are:
-; CHECK-NEXT: ;i32 %n
+; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined
; CHECK-NEXT: br label %loop
entry:
br label %loop
; CHECK-LABEL: loop:
-; CHECK-LABEL: ; OverDefined values for block are:
-; CHECK-NEXT: ; %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]
-; CHECK-NEXT: ; CachedLatticeValues for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]'
-; CHECK-DAG: ; at beginning of BasicBlock: '%loop' LatticeVal: 'constantrange<0, -2147483647>'
-; CHECK-DAG: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<0, -2147483648>'
-; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
-; CHECK: %cnd = and i1 %cnd1, %cnd2
-; CHECK: br i1 %cnd, label %backedge, label %exit
+; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined
+; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%loop' is: constantrange<0, -2147483647>
+; CHECK-DAG: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, -2147483648>
+; CHECK-DAG: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<0, -2147483647>
+; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
loop:
%iv = phi i32 [0, %entry], [%iv.next, %backedge]
+; CHECK-NEXT: ; LatticeVal for: ' %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%loop' is: overdefined
+; CHECK-DAG: ; LatticeVal for: ' %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%backedge' is: constantrange<1, -2147483648>
+; CHECK-DAG: ; LatticeVal for: ' %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%exit' is: overdefined
+; CHECK-NEXT: %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]
%iv2 = phi i32 [%n, %entry], [%iv2.next, %backedge]
+
+; CHECK-NEXT: ; LatticeVal for: ' %cnd1 = icmp sge i32 %iv, 0' in BB: '%loop' is: overdefined
+; CHECK-DAG: ; LatticeVal for: ' %cnd1 = icmp sge i32 %iv, 0' in BB: '%backedge' is: overdefined
+; CHECK-DAG: ; LatticeVal for: ' %cnd1 = icmp sge i32 %iv, 0' in BB: '%exit' is: overdefined
+; CHECK-NEXT: %cnd1 = icmp sge i32 %iv, 0
%cnd1 = icmp sge i32 %iv, 0
%cnd2 = icmp sgt i32 %iv2, 0
+; CHECK: %cnd2 = icmp sgt i32 %iv2, 0
+; CHECK: ; LatticeVal for: ' %cnd = and i1 %cnd1, %cnd2' in BB: '%loop' is: overdefined
+; CHECK-DAG: ; LatticeVal for: ' %cnd = and i1 %cnd1, %cnd2' in BB: '%backedge' is: constantrange<-1, 0>
+; CHECK-DAG: ; LatticeVal for: ' %cnd = and i1 %cnd1, %cnd2' in BB: '%exit' is: overdefined
+; CHECK-NEXT: %cnd = and i1 %cnd1, %cnd2
%cnd = and i1 %cnd1, %cnd2
br i1 %cnd, label %backedge, label %exit
; CHECK-LABEL: backedge:
-; CHECK-NEXT: ; CachedLatticeValues for: ' %iv.next = add nsw i32 %iv, 1'
-; CHECK-NEXT: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<1, -2147483647>'
-; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1
-; CHECK-NEXT: %iv2.next = sub nsw i32 %iv2, 1
-; CHECK: %cont = and i1 %cont1, %cont2
-; CHECK: br i1 %cont, label %loop, label %exit
+; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined
+; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, -2147483647>
+; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1
backedge:
%iv.next = add nsw i32 %iv, 1
%iv2.next = sub nsw i32 %iv2, 1
+; CHECK: ; LatticeVal for: ' %cont1 = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined
+; CHECK-NEXT: %cont1 = icmp slt i32 %iv.next, 400
%cont1 = icmp slt i32 %iv.next, 400
+; CHECK-NEXT: ; LatticeVal for: ' %cont2 = icmp sgt i32 %iv2.next, 0' in BB: '%backedge' is: overdefined
+; CHECK-NEXT: %cont2 = icmp sgt i32 %iv2.next, 0
%cont2 = icmp sgt i32 %iv2.next, 0
+; CHECK-NEXT: ; LatticeVal for: ' %cont = and i1 %cont1, %cont2' in BB: '%backedge' is: overdefined
+; CHECK-NEXT: %cont = and i1 %cont1, %cont2
%cont = and i1 %cont1, %cont2
br i1 %cont, label %loop, label %exit
diff --git a/test/Bindings/OCaml/core.ml b/test/Bindings/OCaml/core.ml
index 105f1bc4f732..802baa0b80b2 100644
--- a/test/Bindings/OCaml/core.ml
+++ b/test/Bindings/OCaml/core.ml
@@ -66,6 +66,16 @@ let suite name f =
let filename = Sys.argv.(1)
let m = create_module context filename
+(*===-- Contained types --------------------------------------------------===*)
+
+let test_contained_types () =
+ let pointer_i32 = pointer_type i32_type in
+ insist (i32_type = (Array.get (subtypes pointer_i32) 0));
+
+ let ar = struct_type context [| i32_type; i8_type |] in
+ insist (i32_type = (Array.get (subtypes ar)) 0);
+ insist (i8_type = (Array.get (subtypes ar)) 1)
+
(*===-- Conversion --------------------------------------------------------===*)
@@ -1533,6 +1543,7 @@ let test_writer () =
(*===-- Driver ------------------------------------------------------------===*)
let _ =
+ suite "contained types" test_contained_types;
suite "conversion" test_conversion;
suite "target" test_target;
suite "constants" test_constants;
diff --git a/test/Bitcode/ptest-old.ll b/test/Bitcode/ptest-old.ll
index 53ffef900b57..184f72e9856e 100644
--- a/test/Bitcode/ptest-old.ll
+++ b/test/Bitcode/ptest-old.ll
@@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
; RUN: verify-uselistorder < %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
define i32 @foo(<4 x float> %bar) nounwind {
entry:
diff --git a/test/BugPoint/unsymbolized.ll b/test/BugPoint/unsymbolized.ll
new file mode 100644
index 000000000000..8547f220ea26
--- /dev/null
+++ b/test/BugPoint/unsymbolized.ll
@@ -0,0 +1,21 @@
+; REQUIRES: loadable_module
+; RUN: echo "import sys" > %t.py
+; RUN: echo "print('args = ' + str(sys.argv))" >> %t.py
+; RUN: echo "exit(1)" >> %t.py
+; RUN: not bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -opt-command="%python" -opt-args %t.py | FileCheck %s
+; RUN: not --crash opt -load %llvmshlibdir/BugpointPasses%shlibext %s -bugpoint-crashcalls -disable-symbolication 2>&1 | FileCheck --check-prefix=CRASH %s
+
+; Test that bugpoint disables symbolication on the opt tool to reduce runtime overhead when opt crashes
+; CHECK: args = {{.*}}'-disable-symbolication'
+
+; Test that opt, when it crashes & is passed -disable-symbolication, doesn't symbolicate.
+; In theory this test should maybe be in test/tools/opt or
+; test/Transforms, but since there doesn't seem to be another convenient way to
+; crash opt, apart from the BugpointPasses dynamic plugin, this is the spot for
+; now.
+; CRASH-NOT: Signals.inc
+
+define void @f() {
+ call void @f()
+ ret void
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 25c340fea6f7..b52b6018e026 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -61,7 +61,7 @@ set(LLVM_TEST_DEPENDS
llvm-nm
llvm-objdump
llvm-opt-report
- llvm-pdbdump
+ llvm-pdbutil
llvm-profdata
llvm-ranlib
llvm-readobj
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll
index 006308641184..cd3ea9715e0f 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll
@@ -4,7 +4,7 @@
; CHECK: name: test_stack_guard
; CHECK: stack:
-; CHECK: - { id: 0, name: StackGuardSlot, offset: 0, size: 8, alignment: 8 }
+; CHECK: - { id: 0, name: StackGuardSlot, type: default, offset: 0, size: 8, alignment: 8,
; CHECK-NOT: id: 1
; CHECK: [[GUARD_SLOT:%[0-9]+]](p0) = G_FRAME_INDEX %stack.0.StackGuardSlot
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index ac3d4b17f739..65b8ba570701 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -31,10 +31,13 @@ define i64 @muli64(i64 %arg1, i64 %arg2) {
; Tests for alloca
; CHECK-LABEL: name: allocai64
; CHECK: stack:
-; CHECK-NEXT: - { id: 0, name: ptr1, offset: 0, size: 8, alignment: 8 }
-; CHECK-NEXT: - { id: 1, name: ptr2, offset: 0, size: 8, alignment: 1 }
-; CHECK-NEXT: - { id: 2, name: ptr3, offset: 0, size: 128, alignment: 8 }
-; CHECK-NEXT: - { id: 3, name: ptr4, offset: 0, size: 1, alignment: 8 }
+; CHECK-NEXT: - { id: 0, name: ptr1, type: default, offset: 0, size: 8, alignment: 8,
+; CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+; CHECK-NEXT: - { id: 1, name: ptr2, type: default, offset: 0, size: 8, alignment: 1,
+; CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+; CHECK-NEXT: - { id: 2, name: ptr3, type: default, offset: 0, size: 128, alignment: 8,
+; CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+; CHECK-NEXT: - { id: 3, name: ptr4, type: default, offset: 0, size: 1, alignment: 8,
; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.0.ptr1
; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.1.ptr2
; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.2.ptr3
@@ -1550,3 +1553,15 @@ define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2)
define <4 x half> @test_constant_vector() {
ret <4 x half> <half undef, half undef, half undef, half 0xH3C00>
}
+
+define i32 @test_target_mem_intrinsic(i32* %addr) {
+; CHECK-LABEL: name: test_target_mem_intrinsic
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[VAL:%[0-9]+]](s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load 4 from %ir.addr)
+; CHECK: G_TRUNC [[VAL]](s64)
+ %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr)
+ %trunc = trunc i64 %val to i32
+ ret i32 %trunc
+}
+
+declare i64 @llvm.aarch64.ldxr.p0i32(i32*) nounwind
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
index 0f054f1d940c..296f65c041a1 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
@@ -98,8 +98,8 @@ name: defaultMapping
legalized: true
# CHECK-LABEL: name: defaultMapping
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -119,8 +119,8 @@ name: defaultMappingVector
legalized: true
# CHECK-LABEL: name: defaultMappingVector
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -141,10 +141,10 @@ name: defaultMapping1Repair
legalized: true
# CHECK-LABEL: name: defaultMapping1Repair
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -166,10 +166,10 @@ name: defaultMapping2Repairs
legalized: true
# CHECK-LABEL: name: defaultMapping2Repairs
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -193,9 +193,9 @@ name: defaultMappingDefRepair
legalized: true
# CHECK-LABEL: name: defaultMappingDefRepair
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: fpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: fpr }
@@ -215,11 +215,11 @@ name: phiPropagation
legalized: true
tracksRegLiveness: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr64sp }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
-# CHECK-NEXT: - { id: 3, class: gpr }
-# CHECK-NEXT: - { id: 4, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr32 }
- { id: 1, class: gpr64sp }
@@ -254,10 +254,10 @@ name: defaultMappingUseRepairPhysReg
legalized: true
# CHECK-LABEL: name: defaultMappingUseRepairPhysReg
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: fpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -280,8 +280,8 @@ name: defaultMappingDefRepairPhysReg
legalized: true
# CHECK-LABEL: name: defaultMappingDefRepairPhysReg
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -303,18 +303,18 @@ name: greedyMappingOr
legalized: true
# CHECK-LABEL: name: greedyMappingOr
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
# Fast mode maps vector instruction on FPR.
-# FAST-NEXT: - { id: 2, class: fpr }
+# FAST-NEXT: - { id: 2, class: fpr, preferred-register: '' }
# Fast mode needs two extra copies.
-# FAST-NEXT: - { id: 3, class: fpr }
-# FAST-NEXT: - { id: 4, class: fpr }
+# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' }
+# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' }
# Greedy mode coalesce the computation on the GPR register
# because it is the cheapest.
-# GREEDY-NEXT: - { id: 2, class: gpr }
+# GREEDY-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -350,18 +350,18 @@ name: greedyMappingOrWithConstraints
legalized: true
# CHECK-LABEL: name: greedyMappingOrWithConstraints
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: fpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' }
# Fast mode maps vector instruction on FPR.
# Fast mode needs two extra copies.
-# FAST-NEXT: - { id: 3, class: fpr }
-# FAST-NEXT: - { id: 4, class: fpr }
+# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' }
+# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' }
# Greedy mode coalesce the computation on the GPR register because it
# is the cheapest, but will need one extra copy to materialize %2 into a FPR.
-# GREEDY-NEXT: - { id: 3, class: gpr }
+# GREEDY-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -396,8 +396,8 @@ body: |
name: ignoreTargetSpecificInst
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr64 }
- { id: 1, class: gpr64 }
@@ -434,8 +434,8 @@ name: bitcast_s32_gpr
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -457,8 +457,8 @@ name: bitcast_s32_fpr
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr }
-# CHECK-NEXT: - { id: 1, class: fpr }
+# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -480,9 +480,9 @@ name: bitcast_s32_gpr_fpr
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# FAST-NEXT: - { id: 1, class: fpr }
-# GREEDY-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# FAST-NEXT: - { id: 1, class: fpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -504,9 +504,9 @@ name: bitcast_s32_fpr_gpr
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr }
-# FAST-NEXT: - { id: 1, class: gpr }
-# GREEDY-NEXT: - { id: 1, class: fpr }
+# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' }
+# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -528,8 +528,8 @@ name: bitcast_s64_gpr
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -551,8 +551,8 @@ name: bitcast_s64_fpr
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr }
-# CHECK-NEXT: - { id: 1, class: fpr }
+# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -574,9 +574,9 @@ name: bitcast_s64_gpr_fpr
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# FAST-NEXT: - { id: 1, class: fpr }
-# GREEDY-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# FAST-NEXT: - { id: 1, class: fpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -597,9 +597,9 @@ name: bitcast_s64_fpr_gpr
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr }
-# FAST-NEXT: - { id: 1, class: gpr }
-# GREEDY-NEXT: - { id: 1, class: fpr }
+# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' }
+# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -624,15 +624,15 @@ name: greedyWithChainOfComputation
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# FAST-NEXT: - { id: 2, class: fpr }
-# FAST-NEXT: - { id: 3, class: fpr }
-# FAST-NEXT: - { id: 4, class: fpr }
-# GREEDY-NEXT: - { id: 2, class: gpr }
-# GREEDY-NEXT: - { id: 3, class: gpr }
-# GREEDY-NEXT: - { id: 4, class: gpr }
-# CHECK-NEXT: - { id: 5, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# FAST-NEXT: - { id: 2, class: fpr, preferred-register: '' }
+# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' }
+# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 4, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -674,11 +674,11 @@ name: floatingPointLoad
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: fpr }
-# CHECK-NEXT: - { id: 3, class: fpr }
-# CHECK-NEXT: - { id: 4, class: fpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -716,11 +716,11 @@ name: floatingPointStore
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: fpr }
-# CHECK-NEXT: - { id: 3, class: fpr }
-# CHECK-NEXT: - { id: 4, class: fpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
index 4e6b9cad4c3d..38a90bbfbbd9 100644
--- a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
+++ b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
@@ -3,8 +3,8 @@
; CHECK-LABEL: name: test_stack_slots
; CHECK: fixedStack:
-; CHECK-DAG: - { id: [[STACK0:[0-9]+]], offset: 0, size: 1
-; CHECK-DAG: - { id: [[STACK8:[0-9]+]], offset: 1, size: 1
+; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1,
+; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 1, size: 1,
; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; CHECK: [[LHS:%[0-9]+]](s8) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0)
; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll
index 44705a9c9f65..e923a0b2847f 100644
--- a/test/CodeGen/AArch64/GlobalISel/call-translator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll
@@ -35,7 +35,7 @@ define void @test_simple_arg(i32 %in) {
; CHECK-LABEL: name: test_indirect_call
; CHECK: registers:
; Make sure the register feeding the indirect call is properly constrained.
-; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64 }
+; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64, preferred-register: '' }
; CHECK: %[[FUNC]](p0) = COPY %x0
; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def %lr, implicit %sp
; CHECK: RET_ReallyLR
@@ -165,9 +165,9 @@ define zeroext i8 @test_abi_zext_ret(i8* %addr) {
; CHECK-LABEL: name: test_stack_slots
; CHECK: fixedStack:
-; CHECK-DAG: - { id: [[STACK0:[0-9]+]], offset: 0, size: 8
-; CHECK-DAG: - { id: [[STACK8:[0-9]+]], offset: 8, size: 8
-; CHECK-DAG: - { id: [[STACK16:[0-9]+]], offset: 16, size: 8
+; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8,
+; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8,
+; CHECK-DAG: - { id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 8,
; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; CHECK: [[LHS:%[0-9]+]](s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0)
; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
@@ -208,7 +208,7 @@ define void @test_call_stack() {
; CHECK-LABEL: name: test_mem_i1
; CHECK: fixedStack:
-; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false }
+; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true,
; CHECK: [[ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[SLOT]]
; CHECK: {{%[0-9]+}}(s1) = G_LOAD [[ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[SLOT]], align 0)
define void @test_mem_i1([8 x i64], i1 %in) {
diff --git a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll
index e01bd2a9f7c8..e832ba953241 100644
--- a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll
+++ b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll
@@ -3,8 +3,8 @@
; CHECK-LABEL: name: debug_declare
; CHECK: stack:
-; CHECK: - { id: {{.*}}, name: in.addr, offset: {{.*}}, size: {{.*}}, alignment: {{.*}}, di-variable: '!11',
-; CHECK-NEXT: di-expression: '!12', di-location: '!13' }
+; CHECK: - { id: {{.*}}, name: in.addr, type: default, offset: 0, size: {{.*}}, alignment: {{.*}},
+; CHECK-NEXT: callee-saved-register: '', di-variable: '!11', di-expression: '!12',
; CHECK: DBG_VALUE debug-use %0(s32), debug-use _, !11, !12, debug-location !13
define void @debug_declare(i32 %in) #0 !dbg !7 {
entry:
diff --git a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir
index ea8a77ca3917..28c926b5d062 100644
--- a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir
+++ b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir
@@ -35,15 +35,15 @@ regBankSelected: true
tracksRegLiveness: true
registers:
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: fpr }
-# CHECK-NEXT: - { id: 3, class: fpr }
-# CHECK-NEXT: - { id: 4, class: fpr }
-# CHECK-NEXT: - { id: 5, class: fpr }
+# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: fpr, preferred-register: '' }
# The localizer will create two new values to materialize the constants.
-# OPTNONE-NEXT: - { id: 6, class: fpr }
-# OPTNONE-NEXT: - { id: 7, class: fpr }
+# OPTNONE-NEXT: - { id: 6, class: fpr, preferred-register: '' }
+# OPTNONE-NEXT: - { id: 7, class: fpr, preferred-register: '' }
- { id: 0, class: fpr }
- { id: 1, class: gpr }
- { id: 2, class: fpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/localizer.mir b/test/CodeGen/AArch64/GlobalISel/localizer.mir
index 5bf8dac79860..afe2c13f025d 100644
--- a/test/CodeGen/AArch64/GlobalISel/localizer.mir
+++ b/test/CodeGen/AArch64/GlobalISel/localizer.mir
@@ -44,11 +44,11 @@ regBankSelected: true
# CHECK: registers:
# Existing registers should be left untouched
-# CHECK: - { id: 0, class: gpr }
-#CHECK-NEXT: - { id: 1, class: gpr }
-#CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
# The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT: - { id: 3, class: gpr }
+#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
@@ -82,11 +82,11 @@ regBankSelected: true
# CHECK: registers:
# Existing registers should be left untouched
-# CHECK: - { id: 0, class: gpr }
-#CHECK-NEXT: - { id: 1, class: gpr }
-#CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
# The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT: - { id: 3, class: gpr }
+#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
@@ -120,13 +120,13 @@ tracksRegLiveness: true
# CHECK: registers:
# Existing registers should be left untouched
-# CHECK: - { id: 0, class: gpr }
-#CHECK-NEXT: - { id: 1, class: gpr }
-#CHECK-NEXT: - { id: 2, class: gpr }
-#CHECK-NEXT: - { id: 3, class: gpr }
-#CHECK-NEXT: - { id: 4, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' }
# The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT: - { id: 5, class: gpr }
+#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
@@ -168,14 +168,14 @@ tracksRegLiveness: true
# CHECK: registers:
# Existing registers should be left untouched
-# CHECK: - { id: 0, class: gpr }
-#CHECK-NEXT: - { id: 1, class: gpr }
-#CHECK-NEXT: - { id: 2, class: gpr }
-#CHECK-NEXT: - { id: 3, class: gpr }
-#CHECK-NEXT: - { id: 4, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' }
# The newly created regs should be on the same regbank/regclass as its origin.
-#CHECK-NEXT: - { id: 5, class: gpr }
-#CHECK-NEXT: - { id: 6, class: gpr }
+#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 6, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
@@ -219,14 +219,14 @@ tracksRegLiveness: true
# CHECK: registers:
# Existing registers should be left untouched
-# CHECK: - { id: 0, class: gpr }
-#CHECK-NEXT: - { id: 1, class: gpr }
-#CHECK-NEXT: - { id: 2, class: gpr }
-#CHECK-NEXT: - { id: 3, class: gpr }
-#CHECK-NEXT: - { id: 4, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' }
# The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT: - { id: 5, class: gpr }
-#CHECK-NEXT: - { id: 6, class: gpr }
+#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 6, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
@@ -270,14 +270,14 @@ tracksRegLiveness: true
# CHECK: registers:
# Existing registers should be left untouched
-# CHECK: - { id: 0, class: fpr }
-#CHECK-NEXT: - { id: 1, class: fpr }
-#CHECK-NEXT: - { id: 2, class: fpr }
-#CHECK-NEXT: - { id: 3, class: fpr }
-#CHECK-NEXT: - { id: 4, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' }
# The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT: - { id: 5, class: fpr }
-#CHECK-NEXT: - { id: 6, class: fpr }
+#CHECK-NEXT: - { id: 5, class: fpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 6, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: fpr }
@@ -323,12 +323,12 @@ tracksRegLiveness: true
# CHECK: registers:
# Existing registers should be left untouched
-# CHECK: - { id: 0, class: fpr }
-#CHECK-NEXT: - { id: 1, class: fpr }
-#CHECK-NEXT: - { id: 2, class: fpr }
-#CHECK-NEXT: - { id: 3, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' }
+#CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' }
# The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT: - { id: 4, class: fpr }
+#CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: fpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir
index 73d4d2054729..c8a8266e8b28 100644
--- a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir
+++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir
@@ -32,7 +32,7 @@
name: test_dbg_value
legalized: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
body: |
bb.0:
liveins: %w0
diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir
index 14ee40c941bf..b8468d8cf55f 100644
--- a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir
+++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir
@@ -73,8 +73,8 @@
name: test_add_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -92,8 +92,8 @@ body: |
name: test_add_v4s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -111,8 +111,8 @@ body: |
name: test_sub_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -130,8 +130,8 @@ body: |
name: test_sub_v4s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -149,8 +149,8 @@ body: |
name: test_mul_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -168,8 +168,8 @@ body: |
name: test_mul_v4s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -187,8 +187,8 @@ body: |
name: test_and_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -206,8 +206,8 @@ body: |
name: test_and_v4s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -225,8 +225,8 @@ body: |
name: test_or_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -244,8 +244,8 @@ body: |
name: test_or_v4s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -263,8 +263,8 @@ body: |
name: test_xor_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -282,8 +282,8 @@ body: |
name: test_xor_v4s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -301,8 +301,8 @@ body: |
name: test_shl_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -320,8 +320,8 @@ body: |
name: test_shl_v4s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -339,8 +339,8 @@ body: |
name: test_lshr_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -358,8 +358,8 @@ body: |
name: test_ashr_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -377,8 +377,8 @@ body: |
name: test_sdiv_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -396,8 +396,8 @@ body: |
name: test_udiv_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -415,8 +415,8 @@ body: |
name: test_anyext_s64_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -434,8 +434,8 @@ body: |
name: test_sext_s64_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -453,8 +453,8 @@ body: |
name: test_zext_s64_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -472,8 +472,8 @@ body: |
name: test_trunc_s32_s64
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -491,7 +491,7 @@ body: |
name: test_constant_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
body: |
@@ -505,7 +505,7 @@ body: |
name: test_constant_p0
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
body: |
@@ -519,8 +519,8 @@ body: |
name: test_icmp_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -538,8 +538,8 @@ body: |
name: test_icmp_p0
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -557,7 +557,7 @@ body: |
name: test_frame_index_p0
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
stack:
@@ -573,8 +573,8 @@ body: |
name: test_ptrtoint_s64_p0
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -592,8 +592,8 @@ body: |
name: test_inttoptr_p0_s64
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -611,8 +611,8 @@ body: |
name: test_load_s32_p0
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -630,8 +630,8 @@ body: |
name: test_store_s32_p0
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -651,8 +651,8 @@ body: |
name: test_fadd_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -670,8 +670,8 @@ body: |
name: test_fsub_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -689,8 +689,8 @@ body: |
name: test_fmul_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -708,8 +708,8 @@ body: |
name: test_fdiv_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -727,8 +727,8 @@ body: |
name: test_fpext_s64_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -746,8 +746,8 @@ body: |
name: test_fptrunc_s32_s64
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -765,7 +765,7 @@ body: |
name: test_fconstant_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
body: |
@@ -779,8 +779,8 @@ body: |
name: test_fcmp_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -798,8 +798,8 @@ body: |
name: test_sitofp_s64_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -817,8 +817,8 @@ body: |
name: test_uitofp_s32_s64
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -836,8 +836,8 @@ body: |
name: test_fptosi_s64_s32
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -855,8 +855,8 @@ body: |
name: test_fptoui_s32_s64
legalized: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-binop.mir b/test/CodeGen/AArch64/GlobalISel/select-binop.mir
index 8ae2e1b2eb7d..70cda516d5f1 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-binop.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-binop.mir
@@ -64,9 +64,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -94,9 +94,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -123,9 +123,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr32sp }
+# CHECK-NEXT: - { id: 0, class: gpr32sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32sp, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -151,9 +151,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr64sp }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64sp, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -179,9 +179,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr32sp }
+# CHECK-NEXT: - { id: 0, class: gpr32sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32sp, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -213,9 +213,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -243,9 +243,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -273,9 +273,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -303,9 +303,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -333,9 +333,9 @@ legalized: true
regBankSelected: true
#
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
-# CHECK-NEXT: - { id: 2, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -365,9 +365,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -395,9 +395,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -425,9 +425,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -455,9 +455,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -485,9 +485,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -515,9 +515,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -545,9 +545,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -575,9 +575,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -606,9 +606,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -636,9 +636,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -666,10 +666,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
-# CHECK-NEXT: - { id: 3, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr64, preferred-register: '' }
# CHECK: body:
# CHECK: %0 = COPY %x0
@@ -696,9 +696,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -726,9 +726,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -756,9 +756,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -786,9 +786,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -816,9 +816,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
-# CHECK-NEXT: - { id: 2, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -845,9 +845,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
-# CHECK-NEXT: - { id: 2, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -874,9 +874,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
-# CHECK-NEXT: - { id: 2, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -903,9 +903,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
-# CHECK-NEXT: - { id: 2, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -932,9 +932,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
-# CHECK-NEXT: - { id: 2, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -961,9 +961,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
-# CHECK-NEXT: - { id: 2, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -990,9 +990,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
-# CHECK-NEXT: - { id: 2, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -1019,9 +1019,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
-# CHECK-NEXT: - { id: 2, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir
index 5ca63dbc214d..d871a80661a8 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir
@@ -19,8 +19,8 @@ name: bitcast_s32_gpr
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32all }
-# CHECK-NEXT: - { id: 1, class: gpr32all }
+# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -44,8 +44,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -69,8 +69,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32all }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -94,8 +94,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32all }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -119,8 +119,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64all }
-# CHECK-NEXT: - { id: 1, class: gpr64all }
+# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -144,8 +144,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -169,8 +169,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64all }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -193,8 +193,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64all }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
index fbb11a1c7a4c..34c3da3a5369 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
@@ -34,8 +34,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr64 }
-# CHECK: - { id: 1, class: fpr32 }
+# CHECK: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -59,8 +59,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK: - { id: 0, class: fpr32 }
-# CHECK: - { id: 1, class: fpr64 }
+# CHECK: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: fpr }
@@ -84,8 +84,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -109,8 +109,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -134,8 +134,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -159,8 +159,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -184,8 +184,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -209,8 +209,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -234,8 +234,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -259,8 +259,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -284,8 +284,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -309,8 +309,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -334,8 +334,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -359,8 +359,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -384,8 +384,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -409,8 +409,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -434,8 +434,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -459,8 +459,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir
index 2ba8b7366252..5f29f8b62fab 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir
@@ -24,9 +24,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32all }
-# CHECK-NEXT: - { id: 1, class: gpr64all }
-# CHECK-NEXT: - { id: 2, class: gpr64all }
+# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64all, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -51,8 +51,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32all }
-# CHECK-NEXT: - { id: 1, class: gpr32all }
+# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -76,9 +76,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -103,8 +103,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -128,8 +128,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -153,8 +153,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -178,9 +178,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -205,8 +205,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -230,8 +230,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -255,8 +255,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir
index 6537408f6d98..b71a9a3d731e 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir
@@ -18,8 +18,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64all }
-# CHECK-NEXT: - { id: 1, class: gpr64all }
+# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -41,8 +41,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -64,8 +64,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -87,8 +87,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -110,8 +110,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -133,8 +133,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-load.mir b/test/CodeGen/AArch64/GlobalISel/select-load.mir
index 9188e2b0c0fc..d00b98d148be 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-load.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-load.mir
@@ -37,8 +37,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -62,8 +62,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -87,8 +87,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -112,8 +112,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -137,8 +137,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -165,10 +165,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -197,10 +197,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -229,10 +229,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -261,10 +261,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -293,8 +293,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -318,8 +318,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -343,8 +343,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: fpr16 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -368,8 +368,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: fpr8 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -393,10 +393,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -425,10 +425,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -457,10 +457,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: fpr16 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: fpr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -489,10 +489,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: fpr8 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: fpr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir
index 7d5b43bc16d5..cd7a79f17d95 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir
@@ -13,13 +13,13 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
-# CHECK-NEXT: - { id: 3, class: gpr }
-# CHECK-NEXT: - { id: 4, class: gpr }
-# CHECK-NEXT: - { id: 5, class: gpr }
-# CHECK-NEXT: - { id: 6, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 6, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-store.mir b/test/CodeGen/AArch64/GlobalISel/select-store.mir
index 9b8f5c566ce0..536e236c2738 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-store.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-store.mir
@@ -35,8 +35,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -62,8 +62,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -89,8 +89,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -116,8 +116,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -143,8 +143,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -169,8 +169,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -195,8 +195,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -223,10 +223,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -255,10 +255,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -287,10 +287,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -319,10 +319,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -351,8 +351,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -378,8 +378,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -405,10 +405,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: fpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
@@ -437,10 +437,10 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
-# CHECK-NEXT: - { id: 1, class: fpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: fpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir
index fc3546e777f7..5559e2d3a0d1 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir
@@ -15,8 +15,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -39,8 +39,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -63,8 +63,8 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-xor.mir b/test/CodeGen/AArch64/GlobalISel/select-xor.mir
index e787849c8d1b..7190fda15b8e 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-xor.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-xor.mir
@@ -20,9 +20,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -50,9 +50,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr64 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -81,9 +81,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -110,9 +110,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64 }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -139,9 +139,9 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select.mir b/test/CodeGen/AArch64/GlobalISel/select.mir
index 8bffa085fdca..5e52bc761a84 100644
--- a/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -35,7 +35,7 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr64sp }
+# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' }
registers:
- { id: 0, class: gpr }
@@ -132,12 +132,12 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr64 }
-# CHECK-NEXT: - { id: 3, class: gpr32 }
-# CHECK-NEXT: - { id: 4, class: gpr64 }
-# CHECK-NEXT: - { id: 5, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -180,12 +180,12 @@ legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: fpr64 }
-# CHECK-NEXT: - { id: 3, class: gpr32 }
-# CHECK-NEXT: - { id: 4, class: gpr32 }
-# CHECK-NEXT: - { id: 5, class: gpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -223,9 +223,9 @@ regBankSelected: true
tracksRegLiveness: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: fpr32 }
+# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' }
registers:
- { id: 0, class: fpr }
- { id: 1, class: gpr }
@@ -260,16 +260,16 @@ regBankSelected: true
tracksRegLiveness: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr32 }
-# CHECK-NEXT: - { id: 1, class: gpr32 }
-# CHECK-NEXT: - { id: 2, class: gpr32 }
-# CHECK-NEXT: - { id: 3, class: gpr32 }
-# CHECK-NEXT: - { id: 4, class: gpr64 }
-# CHECK-NEXT: - { id: 5, class: gpr64 }
-# CHECK-NEXT: - { id: 6, class: gpr64 }
-# CHECK-NEXT: - { id: 7, class: gpr64 }
-# CHECK-NEXT: - { id: 8, class: gpr64 }
-# CHECK-NEXT: - { id: 9, class: gpr64 }
+# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 6, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 7, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 8, class: gpr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 9, class: gpr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll
index 3bd56fa4cebc..af0ab57b0b9f 100644
--- a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll
@@ -4,7 +4,7 @@ define void @test_varargs_sentinel(i8* %list, i64, i64, i64, i64, i64, i64, i64,
i32, ...) {
; CHECK-LABEL: name: test_varargs_sentinel
; CHECK: fixedStack:
-; CHECK: - { id: [[VARARGS_SLOT:[0-9]+]], offset: 8
+; CHECK: - { id: [[VARARGS_SLOT:[0-9]+]], type: default, offset: 8
; CHECK: body:
; CHECK: [[LIST:%[0-9]+]] = COPY %x0
; CHECK: [[VARARGS_AREA:%[0-9]+]] = ADDXri %fixed-stack.[[VARARGS_SLOT]], 0, 0
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
new file mode 100644
index 000000000000..16a02de79a91
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
@@ -0,0 +1,131 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=arm64-eabi < %s | FileCheck --enable-var-scope %s
+
+; Test fptosi
+define i32 @fptosi_wh(half %a) nounwind ssp {
+entry:
+; CHECK-LABEL: fptosi_wh
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzs [[REG:w[0-9]+]], s1
+; CHECK: mov w0, [[REG]]
+ %conv = fptosi half %a to i32
+ ret i32 %conv
+}
+
+; Test fptoui
+define i32 @fptoui_swh(half %a) nounwind ssp {
+entry:
+; CHECK-LABEL: fptoui_swh
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzu [[REG:w[0-9]+]], s1
+; CHECK: mov w0, [[REG]]
+ %conv = fptoui half %a to i32
+ ret i32 %conv
+}
+
+; Test sitofp
+define half @sitofp_hw_i1(i1 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hw_i1
+; CHECK: sbfx w0, w0, #0, #1
+; CHECK: scvtf s0, w0
+; CHECK: fcvt h0, s0
+ %conv = sitofp i1 %a to half
+ ret half %conv
+}
+
+; Test sitofp
+define half @sitofp_hw_i8(i8 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hw_i8
+; CHECK: sxtb w0, w0
+; CHECK: scvtf s0, w0
+; CHECK: fcvt h0, s0
+ %conv = sitofp i8 %a to half
+ ret half %conv
+}
+
+; Test sitofp
+define half @sitofp_hw_i16(i16 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hw_i16
+; CHECK: sxth w0, w0
+; CHECK: scvtf s0, w0
+; CHECK: fcvt h0, s0
+ %conv = sitofp i16 %a to half
+ ret half %conv
+}
+
+; Test sitofp
+define half @sitofp_hw_i32(i32 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hw_i32
+; CHECK: scvtf s0, w0
+; CHECK: fcvt h0, s0
+ %conv = sitofp i32 %a to half
+ ret half %conv
+}
+
+; Test sitofp
+define half @sitofp_hx(i64 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hx
+; CHECK: scvtf s0, x0
+; CHECK: fcvt h0, s0
+ %conv = sitofp i64 %a to half
+ ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hw_i1(i1 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hw_i1
+; CHECK: and w0, w0, #0x1
+; CHECK: ucvtf s0, w0
+; CHECK: fcvt h0, s0
+ %conv = uitofp i1 %a to half
+ ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hw_i8(i8 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hw_i8
+; CHECK: and w0, w0, #0xff
+; CHECK: ucvtf s0, w0
+; CHECK: fcvt h0, s0
+ %conv = uitofp i8 %a to half
+ ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hw_i16(i16 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hw_i16
+; CHECK: and w0, w0, #0xffff
+; CHECK: ucvtf s0, w0
+; CHECK: fcvt h0, s0
+ %conv = uitofp i16 %a to half
+ ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hw_i32(i32 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hw_i32
+; CHECK: ucvtf s0, w0
+; CHECK: fcvt h0, s0
+ %conv = uitofp i32 %a to half
+ ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hx(i64 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hx
+; CHECK: ucvtf s0, x0
+; CHECK: fcvt h0, s0
+ %conv = uitofp i64 %a to half
+ ret half %conv
+}
+
+
diff --git a/test/CodeGen/AArch64/spill-undef.mir b/test/CodeGen/AArch64/spill-undef.mir
new file mode 100644
index 000000000000..4294df286bd3
--- /dev/null
+++ b/test/CodeGen/AArch64/spill-undef.mir
@@ -0,0 +1,67 @@
+# RUN: llc %s -run-pass greedy -o - | FileCheck %s
+# Check that we don't insert spill code for undef values.
+# Uninitialized memory for them is fine.
+# PR33311
+--- |
+ ; ModuleID = 'stuff.ll'
+ target triple = "aarch64--"
+
+ @g = external global i32
+
+ define void @foobar() {
+ ret void
+ }
+
+...
+---
+name: foobar
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+ - { id: 2, class: gpr32all }
+ - { id: 3, class: gpr32 }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: gpr32 }
+ - { id: 6, class: gpr64common }
+ - { id: 7, class: gpr32 }
+ - { id: 8, class: gpr32 }
+ - { id: 9, class: gpr64 }
+body: |
+ bb.0:
+ liveins: %x0
+ successors: %bb.1, %bb.2
+
+ ; %8 is going to be spilled.
+ ; But on that path, we don't care about its value.
+ ; Emit a simple KILL instruction instead of an
+ ; actual spill.
+ ; CHECK: [[UNDEF:%[0-9]+]] = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[UNDEF]]
+ %8 = IMPLICIT_DEF
+ ; %9 us going to be spilled.
+ ; But it is only partially undef.
+ ; Make sure we spill it properly
+ ; CHECK: [[NINE:%[0-9]+]] = COPY %x0
+ ; CHECK: [[NINE]].sub_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: STRXui [[NINE]]
+ %9 = COPY %x0
+ %9.sub_32 = IMPLICIT_DEF
+ CBNZW %wzr, %bb.2
+ B %bb.1
+
+ bb.1:
+ %4 = ADRP target-flags(aarch64-page) @g
+ %8 = LDRWui %4, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile dereferenceable load 4 from @g)
+ INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr
+
+ bb.2:
+ INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr
+ %6 = ADRP target-flags(aarch64-page) @g
+ %w0 = MOVi32imm 42
+ STRWui %8, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 4 into @g)
+ STRXui %9, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 8 into @g)
+ RET_ReallyLR implicit killed %w0
+
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir
new file mode 100644
index 000000000000..ebd473d769b3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir
@@ -0,0 +1,24 @@
+# RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s
+
+--- |
+ define void @test_icmp() {
+ entry:
+ ret void
+ }
+...
+
+---
+name: test_icmp
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0.entry:
+ liveins: %vgpr0
+ %0(s32) = G_CONSTANT i32 0
+ %1(s32) = COPY %vgpr0
+
+ ; CHECK: %2(s1) = G_ICMP intpred(ne), %0(s32), %1
+ %2(s1) = G_ICMP intpred(ne), %0, %1
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
new file mode 100644
index 000000000000..d11130936bd9
--- /dev/null
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
@@ -0,0 +1,28 @@
+# RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s
+
+--- |
+ define void @test_select() { ret void }
+...
+
+---
+name: test_select
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+ - { id: 3, class: _ }
+ - { id: 4, class: _ }
+ - { id: 5, class: _ }
+body: |
+ bb.0:
+ liveins: %vgpr0
+ %0(s32) = G_CONSTANT i32 0
+ %1(s32) = COPY %vgpr0
+
+ %2(s1) = G_ICMP intpred(ne), %0, %1
+ %3(s32) = G_CONSTANT i32 1
+ %4(s32) = G_CONSTANT i32 2
+ ; CHECK: %5(s32) = G_SELECT %2(s1), %3, %4
+ %5(s32) = G_SELECT %2, %3, %4
+
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
index 3496b1ab71fe..902f1e6c6725 100644
--- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
+++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
@@ -24,8 +24,8 @@ legalized: true
# CHECK-LABEL: name: load_constant
# CHECK: registers:
-# CHECK: - { id: 0, class: sgpr }
-# CHECK: - { id: 1, class: sgpr }
+# CHECK: - { id: 0, class: sgpr, preferred-register: '' }
+# CHECK: - { id: 1, class: sgpr, preferred-register: '' }
body: |
bb.0:
@@ -40,8 +40,8 @@ legalized: true
# CHECK-LABEL: name: load_global_uniform
# CHECK: registers:
-# CHECK: - { id: 0, class: sgpr }
-# CHECK: - { id: 1, class: sgpr }
+# CHECK: - { id: 0, class: sgpr, preferred-register: '' }
+# CHECK: - { id: 1, class: sgpr, preferred-register: '' }
body: |
bb.0:
@@ -56,9 +56,9 @@ legalized: true
# CHECK-LABEL: name: load_global_non_uniform
# CHECK: registers:
-# CHECK: - { id: 0, class: sgpr }
-# CHECK: - { id: 1, class: vgpr }
-# CHECK: - { id: 2, class: vgpr }
+# CHECK: - { id: 0, class: sgpr, preferred-register: '' }
+# CHECK: - { id: 1, class: vgpr, preferred-register: '' }
+# CHECK: - { id: 2, class: vgpr, preferred-register: '' }
body: |
diff --git a/test/CodeGen/AMDGPU/add.v2i16.ll b/test/CodeGen/AMDGPU/add.v2i16.ll
index e5e2d436deb0..76f724c2b90b 100644
--- a/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -66,7 +66,7 @@ define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}}
; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0x1c8
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -84,7 +84,7 @@ define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %ou
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}}
; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -101,7 +101,7 @@ define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)*
; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1
; VI: flat_load_ushort [[LOAD0:v[0-9]+]]
; VI: flat_load_ushort [[LOAD1:v[0-9]+]]
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD1]]
; VI: v_or_b32_e32
define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
@@ -140,7 +140,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspac
; VI-NOT: v_add_u16
; VI: v_mov_b32_e32 v[[K:[0-9]+]], 0x3f80
-; VI: v_add_u16_sdwa v{{[0-9]+}}, v[[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NOT: v_add_u16
; VI: v_or_b32_e32
define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
diff --git a/test/CodeGen/AMDGPU/ashr.v2i16.ll b/test/CodeGen/AMDGPU/ashr.v2i16.ll
index 7f424ef2a147..dd96e6264418 100644
--- a/test/CodeGen/AMDGPU/ashr.v2i16.ll
+++ b/test/CodeGen/AMDGPU/ashr.v2i16.ll
@@ -9,7 +9,7 @@
; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[VLHS]]
; VI: v_ashrrev_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; CI: v_ashrrev_i32_e32
; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
diff --git a/test/CodeGen/AMDGPU/branch-relax-spill.ll b/test/CodeGen/AMDGPU/branch-relax-spill.ll
index ede15559c4ff..db476c21636f 100644
--- a/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -7,110 +7,110 @@
define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 {
entry:
- %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={SGPR0}"() #0
- %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={SGPR1}"() #0
- %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={SGPR2}"() #0
- %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={SGPR3}"() #0
- %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={SGPR4}"() #0
- %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={SGPR5}"() #0
- %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={SGPR6}"() #0
- %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={SGPR7}"() #0
- %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={SGPR8}"() #0
- %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={SGPR9}"() #0
- %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={SGPR10}"() #0
- %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={SGPR11}"() #0
- %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={SGPR12}"() #0
- %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={SGPR13}"() #0
- %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={SGPR14}"() #0
- %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={SGPR15}"() #0
- %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={SGPR16}"() #0
- %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={SGPR17}"() #0
- %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={SGPR18}"() #0
- %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={SGPR19}"() #0
- %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={SGPR20}"() #0
- %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={SGPR21}"() #0
- %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={SGPR22}"() #0
- %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={SGPR23}"() #0
- %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={SGPR24}"() #0
- %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={SGPR25}"() #0
- %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={SGPR26}"() #0
- %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={SGPR27}"() #0
- %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={SGPR28}"() #0
- %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={SGPR29}"() #0
- %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={SGPR30}"() #0
- %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={SGPR31}"() #0
- %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={SGPR32}"() #0
- %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={SGPR33}"() #0
- %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={SGPR34}"() #0
- %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={SGPR35}"() #0
- %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={SGPR36}"() #0
- %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={SGPR37}"() #0
- %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={SGPR38}"() #0
- %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={SGPR39}"() #0
- %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={SGPR40}"() #0
- %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={SGPR41}"() #0
- %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={SGPR42}"() #0
- %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={SGPR43}"() #0
- %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={SGPR44}"() #0
- %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={SGPR45}"() #0
- %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={SGPR46}"() #0
- %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={SGPR47}"() #0
- %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={SGPR48}"() #0
- %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={SGPR49}"() #0
- %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={SGPR50}"() #0
- %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={SGPR51}"() #0
- %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={SGPR52}"() #0
- %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={SGPR53}"() #0
- %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={SGPR54}"() #0
- %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={SGPR55}"() #0
- %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={SGPR56}"() #0
- %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={SGPR57}"() #0
- %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={SGPR58}"() #0
- %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={SGPR59}"() #0
- %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={SGPR60}"() #0
- %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={SGPR61}"() #0
- %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={SGPR62}"() #0
- %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={SGPR63}"() #0
- %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={SGPR64}"() #0
- %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={SGPR65}"() #0
- %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={SGPR66}"() #0
- %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={SGPR67}"() #0
- %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={SGPR68}"() #0
- %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={SGPR69}"() #0
- %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={SGPR70}"() #0
- %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={SGPR71}"() #0
- %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={SGPR72}"() #0
- %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={SGPR73}"() #0
- %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={SGPR74}"() #0
- %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={SGPR75}"() #0
- %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={SGPR76}"() #0
- %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={SGPR77}"() #0
- %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={SGPR78}"() #0
- %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={SGPR79}"() #0
- %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={SGPR80}"() #0
- %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={SGPR81}"() #0
- %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={SGPR82}"() #0
- %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={SGPR83}"() #0
- %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={SGPR84}"() #0
- %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={SGPR85}"() #0
- %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={SGPR86}"() #0
- %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={SGPR87}"() #0
- %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={SGPR88}"() #0
- %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={SGPR89}"() #0
- %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={SGPR90}"() #0
- %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={SGPR91}"() #0
- %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={SGPR92}"() #0
- %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={SGPR93}"() #0
- %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={SGPR94}"() #0
- %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={SGPR95}"() #0
- %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={SGPR96}"() #0
- %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={SGPR97}"() #0
- %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={SGPR98}"() #0
- %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={SGPR99}"() #0
- %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={SGPR100}"() #0
- %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={SGPR101}"() #0
- %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={SGPR102}"() #0
- %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={SGPR103}"() #0
+ %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0
+ %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0
+ %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0
+ %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0
+ %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0
+ %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0
+ %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0
+ %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0
+ %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0
+ %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0
+ %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0
+ %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0
+ %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0
+ %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0
+ %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0
+ %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0
+ %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0
+ %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0
+ %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0
+ %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0
+ %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0
+ %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0
+ %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0
+ %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0
+ %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0
+ %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0
+ %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0
+ %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0
+ %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0
+ %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0
+ %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0
+ %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0
+ %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0
+ %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0
+ %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0
+ %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0
+ %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0
+ %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0
+ %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0
+ %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0
+ %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0
+ %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0
+ %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0
+ %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0
+ %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0
+ %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0
+ %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0
+ %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0
+ %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0
+ %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0
+ %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0
+ %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0
+ %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0
+ %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0
+ %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0
+ %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0
+ %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0
+ %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0
+ %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0
+ %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0
+ %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0
+ %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0
+ %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0
+ %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0
+ %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0
+ %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0
+ %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0
+ %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0
+ %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0
+ %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0
+ %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0
+ %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0
+ %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0
+ %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0
+ %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0
+ %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0
+ %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0
+ %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0
+ %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0
+ %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0
+ %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0
+ %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0
+ %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0
+ %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0
+ %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0
+ %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0
+ %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0
+ %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0
+ %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0
+ %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0
+ %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0
+ %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0
+ %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0
+ %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0
+ %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0
+ %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0
+ %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0
+ %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0
+ %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0
+ %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0
+ %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0
+ %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0
+ %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={s102}"() #0
+ %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={s103}"() #0
%vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_LO}"() #0
%vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_HI}"() #0
%cmp = icmp eq i32 %cnd, 0
@@ -126,112 +126,112 @@ bb2: ; 28 bytes
br label %bb3
bb3:
- tail call void asm sideeffect "; reg use $0", "{SGPR0}"(i32 %sgpr0) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR1}"(i32 %sgpr1) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR2}"(i32 %sgpr2) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR3}"(i32 %sgpr3) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR4}"(i32 %sgpr4) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR5}"(i32 %sgpr5) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR6}"(i32 %sgpr6) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR7}"(i32 %sgpr7) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR8}"(i32 %sgpr8) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR9}"(i32 %sgpr9) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR10}"(i32 %sgpr10) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR11}"(i32 %sgpr11) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR12}"(i32 %sgpr12) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR13}"(i32 %sgpr13) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR14}"(i32 %sgpr14) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR15}"(i32 %sgpr15) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR16}"(i32 %sgpr16) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR17}"(i32 %sgpr17) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR18}"(i32 %sgpr18) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR19}"(i32 %sgpr19) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR20}"(i32 %sgpr20) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR21}"(i32 %sgpr21) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR22}"(i32 %sgpr22) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR23}"(i32 %sgpr23) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR24}"(i32 %sgpr24) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR25}"(i32 %sgpr25) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR26}"(i32 %sgpr26) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR27}"(i32 %sgpr27) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR28}"(i32 %sgpr28) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR29}"(i32 %sgpr29) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR30}"(i32 %sgpr30) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR31}"(i32 %sgpr31) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR32}"(i32 %sgpr32) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR33}"(i32 %sgpr33) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR34}"(i32 %sgpr34) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR35}"(i32 %sgpr35) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR36}"(i32 %sgpr36) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR37}"(i32 %sgpr37) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR38}"(i32 %sgpr38) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR39}"(i32 %sgpr39) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR40}"(i32 %sgpr40) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR41}"(i32 %sgpr41) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR42}"(i32 %sgpr42) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR43}"(i32 %sgpr43) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR44}"(i32 %sgpr44) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR45}"(i32 %sgpr45) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR46}"(i32 %sgpr46) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR47}"(i32 %sgpr47) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR48}"(i32 %sgpr48) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR49}"(i32 %sgpr49) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR50}"(i32 %sgpr50) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR51}"(i32 %sgpr51) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR52}"(i32 %sgpr52) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR53}"(i32 %sgpr53) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR54}"(i32 %sgpr54) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR55}"(i32 %sgpr55) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR56}"(i32 %sgpr56) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR57}"(i32 %sgpr57) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR58}"(i32 %sgpr58) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR59}"(i32 %sgpr59) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR60}"(i32 %sgpr60) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR61}"(i32 %sgpr61) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR62}"(i32 %sgpr62) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR63}"(i32 %sgpr63) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR64}"(i32 %sgpr64) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR65}"(i32 %sgpr65) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR66}"(i32 %sgpr66) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR67}"(i32 %sgpr67) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR68}"(i32 %sgpr68) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR69}"(i32 %sgpr69) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR70}"(i32 %sgpr70) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR71}"(i32 %sgpr71) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR72}"(i32 %sgpr72) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR73}"(i32 %sgpr73) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR74}"(i32 %sgpr74) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR75}"(i32 %sgpr75) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR76}"(i32 %sgpr76) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR77}"(i32 %sgpr77) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR78}"(i32 %sgpr78) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR79}"(i32 %sgpr79) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR80}"(i32 %sgpr80) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR81}"(i32 %sgpr81) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR82}"(i32 %sgpr82) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR83}"(i32 %sgpr83) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR84}"(i32 %sgpr84) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR85}"(i32 %sgpr85) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR86}"(i32 %sgpr86) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR87}"(i32 %sgpr87) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR88}"(i32 %sgpr88) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR89}"(i32 %sgpr89) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR90}"(i32 %sgpr90) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR91}"(i32 %sgpr91) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR92}"(i32 %sgpr92) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR93}"(i32 %sgpr93) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR94}"(i32 %sgpr94) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR95}"(i32 %sgpr95) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR96}"(i32 %sgpr96) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR97}"(i32 %sgpr97) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR98}"(i32 %sgpr98) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR99}"(i32 %sgpr99) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR100}"(i32 %sgpr100) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR101}"(i32 %sgpr101) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR102}"(i32 %sgpr102) #0
- tail call void asm sideeffect "; reg use $0", "{SGPR103}"(i32 %sgpr103) #0
- tail call void asm sideeffect "; reg use $0", "{VCC_LO}"(i32 %vcc_lo) #0
- tail call void asm sideeffect "; reg use $0", "{VCC_HI}"(i32 %vcc_hi) #0
+ tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0
+ tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0
+ tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0
+ tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0
+ tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0
+ tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0
+ tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0
+ tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0
+ tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0
+ tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0
+ tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0
+ tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0
+ tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0
+ tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0
+ tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0
+ tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0
+ tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0
+ tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0
+ tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0
+ tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0
+ tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0
+ tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0
+ tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0
+ tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0
+ tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0
+ tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0
+ tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0
+ tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0
+ tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0
+ tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0
+ tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0
+ tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0
+ tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0
+ tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0
+ tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0
+ tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0
+ tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0
+ tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0
+ tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0
+ tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0
+ tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0
+ tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0
+ tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0
+ tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0
+ tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0
+ tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0
+ tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0
+ tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0
+ tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0
+ tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0
+ tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0
+ tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0
+ tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0
+ tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0
+ tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0
+ tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0
+ tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0
+ tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0
+ tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0
+ tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0
+ tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0
+ tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0
+ tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0
+ tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0
+ tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0
+ tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0
+ tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0
+ tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0
+ tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0
+ tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0
+ tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0
+ tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0
+ tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0
+ tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0
+ tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0
+ tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0
+ tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0
+ tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0
+ tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0
+ tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0
+ tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0
+ tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0
+ tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0
+ tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0
+ tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0
+ tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0
+ tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0
+ tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0
+ tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0
+ tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0
+ tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0
+ tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0
+ tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0
+ tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0
+ tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0
+ tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0
+ tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0
+ tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0
+ tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0
+ tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0
+ tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0
+ tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0
+ tail call void asm sideeffect "; reg use $0", "{s102}"(i32 %sgpr102) #0
+ tail call void asm sideeffect "; reg use $0", "{s103}"(i32 %sgpr103) #0
+ tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0
+ tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0
ret void
}
diff --git a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
index fbfd0fbf9308..6ecf75c1acec 100644
--- a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
+++ b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
@@ -24,6 +24,10 @@
ret void
}
+ define amdgpu_ps void @v_max_reg_imm_f32() #0 {
+ ret void
+ }
+
attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
...
@@ -422,3 +426,19 @@ body: |
S_ENDPGM
...
+---
+
+# Pass used to crash with immediate second operand of max
+name: v_max_reg_imm_f32
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+body: |
+ bb.0 (%ir-block.0):
+ liveins: %vgpr0
+
+ %0 = COPY %vgpr0
+ %1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit %exec
+
+...
diff --git a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
index 207dfce75f16..13aafc24895d 100644
--- a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
+++ b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
@@ -2,97 +2,97 @@
; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_tahiti
define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 {
- call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
- call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
- call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
- call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
- call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
- call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
- call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
- call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
- call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
- call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
- call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
- call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
- call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" ()
- call void asm sideeffect "", "~{VCC}" ()
+ call void asm sideeffect "", "~{s[0:7]}" ()
+ call void asm sideeffect "", "~{s[8:15]}" ()
+ call void asm sideeffect "", "~{s[16:23]}" ()
+ call void asm sideeffect "", "~{s[24:31]}" ()
+ call void asm sideeffect "", "~{s[32:39]}" ()
+ call void asm sideeffect "", "~{s[40:47]}" ()
+ call void asm sideeffect "", "~{s[48:55]}" ()
+ call void asm sideeffect "", "~{s[56:63]}" ()
+ call void asm sideeffect "", "~{s[64:71]}" ()
+ call void asm sideeffect "", "~{s[72:79]}" ()
+ call void asm sideeffect "", "~{s[80:87]}" ()
+ call void asm sideeffect "", "~{s[88:95]}" ()
+ call void asm sideeffect "", "~{s[96:103]}" ()
+ call void asm sideeffect "", "~{vcc}" ()
ret void
}
; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_bonaire
define amdgpu_kernel void @use_too_many_sgprs_bonaire() #1 {
- call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
- call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
- call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
- call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
- call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
- call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
- call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
- call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
- call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
- call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
- call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
- call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
- call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" ()
- call void asm sideeffect "", "~{VCC}" ()
+ call void asm sideeffect "", "~{s[0:7]}" ()
+ call void asm sideeffect "", "~{s[8:15]}" ()
+ call void asm sideeffect "", "~{s[16:23]}" ()
+ call void asm sideeffect "", "~{s[24:31]}" ()
+ call void asm sideeffect "", "~{s[32:39]}" ()
+ call void asm sideeffect "", "~{s[40:47]}" ()
+ call void asm sideeffect "", "~{s[48:55]}" ()
+ call void asm sideeffect "", "~{s[56:63]}" ()
+ call void asm sideeffect "", "~{s[64:71]}" ()
+ call void asm sideeffect "", "~{s[72:79]}" ()
+ call void asm sideeffect "", "~{s[80:87]}" ()
+ call void asm sideeffect "", "~{s[88:95]}" ()
+ call void asm sideeffect "", "~{s[96:103]}" ()
+ call void asm sideeffect "", "~{vcc}" ()
ret void
}
; ERROR: error: scalar registers limit of 104 exceeded (108) in use_too_many_sgprs_bonaire_flat_scr
define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() #1 {
- call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
- call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
- call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
- call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
- call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
- call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
- call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
- call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
- call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
- call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
- call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
- call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
- call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" ()
- call void asm sideeffect "", "~{VCC}" ()
- call void asm sideeffect "", "~{FLAT_SCR}" ()
+ call void asm sideeffect "", "~{s[0:7]}" ()
+ call void asm sideeffect "", "~{s[8:15]}" ()
+ call void asm sideeffect "", "~{s[16:23]}" ()
+ call void asm sideeffect "", "~{s[24:31]}" ()
+ call void asm sideeffect "", "~{s[32:39]}" ()
+ call void asm sideeffect "", "~{s[40:47]}" ()
+ call void asm sideeffect "", "~{s[48:55]}" ()
+ call void asm sideeffect "", "~{s[56:63]}" ()
+ call void asm sideeffect "", "~{s[64:71]}" ()
+ call void asm sideeffect "", "~{s[72:79]}" ()
+ call void asm sideeffect "", "~{s[80:87]}" ()
+ call void asm sideeffect "", "~{s[88:95]}" ()
+ call void asm sideeffect "", "~{s[96:103]}" ()
+ call void asm sideeffect "", "~{vcc}" ()
+ call void asm sideeffect "", "~{flat_scratch}" ()
ret void
}
; ERROR: error: scalar registers limit of 96 exceeded (98) in use_too_many_sgprs_iceland
define amdgpu_kernel void @use_too_many_sgprs_iceland() #2 {
- call void asm sideeffect "", "~{VCC}" ()
- call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
- call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
- call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
- call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
- call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
- call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
- call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
- call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
- call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
- call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
- call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
- call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
+ call void asm sideeffect "", "~{vcc}" ()
+ call void asm sideeffect "", "~{s[0:7]}" ()
+ call void asm sideeffect "", "~{s[8:15]}" ()
+ call void asm sideeffect "", "~{s[16:23]}" ()
+ call void asm sideeffect "", "~{s[24:31]}" ()
+ call void asm sideeffect "", "~{s[32:39]}" ()
+ call void asm sideeffect "", "~{s[40:47]}" ()
+ call void asm sideeffect "", "~{s[48:55]}" ()
+ call void asm sideeffect "", "~{s[56:63]}" ()
+ call void asm sideeffect "", "~{s[64:71]}" ()
+ call void asm sideeffect "", "~{s[72:79]}" ()
+ call void asm sideeffect "", "~{s[80:87]}" ()
+ call void asm sideeffect "", "~{s[88:95]}" ()
ret void
}
; ERROR: error: addressable scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji
define amdgpu_kernel void @use_too_many_sgprs_fiji() #3 {
- call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
- call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
- call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
- call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
- call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
- call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
- call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
- call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
- call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
- call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
- call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
- call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
- call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99}" ()
- call void asm sideeffect "", "~{SGPR100_SGPR101}" ()
- call void asm sideeffect "", "~{SGPR102}" ()
+ call void asm sideeffect "", "~{s[0:7]}" ()
+ call void asm sideeffect "", "~{s[8:15]}" ()
+ call void asm sideeffect "", "~{s[16:23]}" ()
+ call void asm sideeffect "", "~{s[24:31]}" ()
+ call void asm sideeffect "", "~{s[32:39]}" ()
+ call void asm sideeffect "", "~{s[40:47]}" ()
+ call void asm sideeffect "", "~{s[48:55]}" ()
+ call void asm sideeffect "", "~{s[56:63]}" ()
+ call void asm sideeffect "", "~{s[64:71]}" ()
+ call void asm sideeffect "", "~{s[72:79]}" ()
+ call void asm sideeffect "", "~{s[80:87]}" ()
+ call void asm sideeffect "", "~{s[88:95]}" ()
+ call void asm sideeffect "", "~{s[96:99]}" ()
+ call void asm sideeffect "", "~{s[100:101]}" ()
+ call void asm sideeffect "", "~{s102}" ()
ret void
}
diff --git a/test/CodeGen/AMDGPU/fabs.f16.ll b/test/CodeGen/AMDGPU/fabs.f16.ll
index d4ef7124a334..4e2ec4b3054f 100644
--- a/test/CodeGen/AMDGPU/fabs.f16.ll
+++ b/test/CodeGen/AMDGPU/fabs.f16.ll
@@ -40,7 +40,7 @@ define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) {
; VI: flat_load_ushort [[LO:v[0-9]+]]
; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}}
; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[HI]]
-; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[MASK]], [[LO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[LO]], [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, [[FABS_HI]], [[FABS_LO]]
; VI: flat_store_dword
@@ -60,8 +60,8 @@ define amdgpu_kernel void @s_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half
; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]]
; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}}
-; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -128,7 +128,7 @@ define amdgpu_kernel void @fabs_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i
; CI: v_cvt_f16_f32
; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
-; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
+; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
; GFX9: v_and_b32_e32 [[FABS:v[0-9]+]], 0x7fff7fff, [[VAL]]
diff --git a/test/CodeGen/AMDGPU/fadd.f16.ll b/test/CodeGen/AMDGPU/fadd.f16.ll
index 9b3d2a475a14..08199be144f4 100644
--- a/test/CodeGen/AMDGPU/fadd.f16.ll
+++ b/test/CodeGen/AMDGPU/fadd.f16.ll
@@ -78,7 +78,7 @@ entry:
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; VI-DAG: v_add_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
; GCN: buffer_store_dword v[[R_V2_F16]]
@@ -108,7 +108,7 @@ entry:
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; VI-DAG: v_mov_b32_e32 v[[CONST2:[0-9]+]], 0x4000
-; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST2]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]]
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
@@ -137,7 +137,7 @@ entry:
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
-; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[CONST1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[A_V2_F16]]
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index 9e8ddd39bbaf..404358f0ecb9 100644
--- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -278,9 +278,9 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa
}
; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16:
-; VI: v_mul_f16_e64 [[REG0:v[0-9]+]], 1.0, {{s[0-9]+}}
-; VI-DAG: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}}
-; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
+; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00
+; VI: v_mul_f16_sdwa [[REG0:v[0-9]+]], [[ONE]], {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}}
; VI-NOT: v_and_b32
; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+$}}
diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
index 5705cbc99443..a7664c399fbb 100644
--- a/test/CodeGen/AMDGPU/flat-scratch-reg.ll
+++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
@@ -21,7 +21,7 @@
; VI-XNACK: ; NumSgprs: 12
define amdgpu_kernel void @no_vcc_no_flat() {
entry:
- call void asm sideeffect "", "~{SGPR7}"()
+ call void asm sideeffect "", "~{s7}"()
ret void
}
@@ -35,7 +35,7 @@ entry:
; VI-XNACK: ; NumSgprs: 12
define amdgpu_kernel void @vcc_no_flat() {
entry:
- call void asm sideeffect "", "~{SGPR7},~{VCC}"()
+ call void asm sideeffect "", "~{s7},~{vcc}"()
ret void
}
@@ -52,7 +52,7 @@ entry:
; HSA-VI-XNACK: ; NumSgprs: 14
define amdgpu_kernel void @no_vcc_flat() {
entry:
- call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"()
+ call void asm sideeffect "", "~{s7},~{flat_scratch}"()
ret void
}
@@ -68,7 +68,7 @@ entry:
; HSA-VI-XNACK: ; NumSgprs: 14
define amdgpu_kernel void @vcc_flat() {
entry:
- call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()
+ call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"()
ret void
}
@@ -81,7 +81,7 @@ entry:
; VI-XNACK: NumSgprs: 6
define amdgpu_kernel void @use_flat_scr() #0 {
entry:
- call void asm sideeffect "; clobber ", "~{FLAT_SCR}"()
+ call void asm sideeffect "; clobber ", "~{flat_scratch}"()
ret void
}
@@ -91,7 +91,7 @@ entry:
; VI-XNACK: NumSgprs: 6
define amdgpu_kernel void @use_flat_scr_lo() #0 {
entry:
- call void asm sideeffect "; clobber ", "~{FLAT_SCR_LO}"()
+ call void asm sideeffect "; clobber ", "~{flat_scratch_lo}"()
ret void
}
@@ -101,7 +101,7 @@ entry:
; VI-XNACK: NumSgprs: 6
define amdgpu_kernel void @use_flat_scr_hi() #0 {
entry:
- call void asm sideeffect "; clobber ", "~{FLAT_SCR_HI}"()
+ call void asm sideeffect "; clobber ", "~{flat_scratch_hi}"()
ret void
}
diff --git a/test/CodeGen/AMDGPU/fmul.f16.ll b/test/CodeGen/AMDGPU/fmul.f16.ll
index 4ef2aa693cf4..cd86409e2038 100644
--- a/test/CodeGen/AMDGPU/fmul.f16.ll
+++ b/test/CodeGen/AMDGPU/fmul.f16.ll
@@ -78,7 +78,7 @@ entry:
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; VI-DAG: v_mul_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
; GCN: buffer_store_dword v[[R_V2_F16]]
@@ -105,7 +105,7 @@ entry:
; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; VI-DAG: v_mov_b32_e32 v[[CONST4:[0-9]+]], 0x4400
-; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
@@ -131,7 +131,7 @@ entry:
; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; VI-DAG: v_mov_b32_e32 v[[CONST3:[0-9]+]], 0x4200
-; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST3]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
index c256159726bf..f4afaca2b7a7 100644
--- a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -73,7 +73,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa
; CIVI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]]
; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; CIVI: flat_store_dword
@@ -92,9 +92,9 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x
; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]]
-; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; GFX9: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000
@@ -116,7 +116,7 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h
; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0
-; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0
+; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 neg_lo:[1,0] neg_hi:[1,0]
diff --git a/test/CodeGen/AMDGPU/fneg.f16.ll b/test/CodeGen/AMDGPU/fneg.f16.ll
index 16e4fc680bea..59745a9352ce 100644
--- a/test/CodeGen/AMDGPU/fneg.f16.ll
+++ b/test/CodeGen/AMDGPU/fneg.f16.ll
@@ -117,7 +117,7 @@ define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i
; CI: v_cvt_f16_f32
; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
-; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
+; VI: v_mul_f16_sdwa v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,0] neg_hi:[1,0]{{$}}
diff --git a/test/CodeGen/AMDGPU/fptosi.f16.ll b/test/CodeGen/AMDGPU/fptosi.f16.ll
index 50e56e08416a..f310618d8bdb 100644
--- a/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -66,7 +66,7 @@ entry:
; VI: v_cvt_f32_f16_sdwa v[[A_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]]
; VI: v_cvt_i32_f32_sdwa v[[R_I16_1:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GCN: buffer_store_dword v[[R_V2_I16]]
; GCN: s_endpgm
diff --git a/test/CodeGen/AMDGPU/fptoui.f16.ll b/test/CodeGen/AMDGPU/fptoui.f16.ll
index 2afa6111cf17..7641c08e33c3 100644
--- a/test/CodeGen/AMDGPU/fptoui.f16.ll
+++ b/test/CodeGen/AMDGPU/fptoui.f16.ll
@@ -66,7 +66,7 @@ entry:
; VI-DAG: v_cvt_f32_f16_sdwa v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; VI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]]
; VI: v_cvt_i32_f32_sdwa v[[R_I16_0:[0-9]+]], v[[A_F32_0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GCN: buffer_store_dword v[[R_V2_I16]]
; GCN: s_endpgm
diff --git a/test/CodeGen/AMDGPU/fsub.f16.ll b/test/CodeGen/AMDGPU/fsub.f16.ll
index 836b480b6a67..fa00c06546db 100644
--- a/test/CodeGen/AMDGPU/fsub.f16.ll
+++ b/test/CodeGen/AMDGPU/fsub.f16.ll
@@ -78,7 +78,7 @@ entry:
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; VI-DAG: v_subrev_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_subrev_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] neg_lo:[0,1] neg_hi:[0,1]
@@ -146,7 +146,7 @@ entry:
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00
-; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONSTM1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]]
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
diff --git a/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/test/CodeGen/AMDGPU/hsa-note-no-func.ll
index af63a4f8df76..81d9ed2eba8c 100644
--- a/test/CodeGen/AMDGPU/hsa-note-no-func.ll
+++ b/test/CodeGen/AMDGPU/hsa-note-no-func.ll
@@ -1,6 +1,12 @@
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s
@@ -15,11 +21,16 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx903 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX903 %s
; HSA: .hsa_code_object_version 2,1
+; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
+; HSA-SI601: .hsa_code_object_isa 6,0,1,"AMD","AMDGPU"
; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU"
; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU"
+; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU"
; HSA-VI800: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU"
; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU"
@@ -28,3 +39,5 @@
; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU"
+; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
+; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU"
diff --git a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
index 6e411ce5e017..0c5b8fbda222 100644
--- a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
+++ b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
@@ -5,40 +5,40 @@
; GCN: ; illegal copy v1 to s9
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 {
- %vgpr = call i32 asm sideeffect "; def $0", "=${VGPR1}"()
- call void asm sideeffect "; use $0", "${SGPR9}"(i32 %vgpr)
+ %vgpr = call i32 asm sideeffect "; def $0", "=${v1}"()
+ call void asm sideeffect "; use $0", "${s9}"(i32 %vgpr)
ret void
}
; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal SGPR to VGPR copy
; GCN: ; illegal copy v[0:1] to s[10:11]
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 {
- %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1}"()
- call void asm sideeffect "; use $0", "${SGPR10_SGPR11}"(<2 x i32> %vgpr)
+ %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${v[0:1]}"()
+ call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr)
ret void
}
; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal SGPR to VGPR copy
; GCN: ; illegal copy v[0:3] to s[8:11]
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 {
- %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3}"()
- call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11}"(<4 x i32> %vgpr)
+ %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${v[0:3]}"()
+ call void asm sideeffect "; use $0", "${s[8:11]}"(<4 x i32> %vgpr)
ret void
}
; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal SGPR to VGPR copy
; GCN: ; illegal copy v[0:7] to s[8:15]
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 {
- %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}"()
- call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}"(<8 x i32> %vgpr)
+ %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${v[0:7]}"()
+ call void asm sideeffect "; use $0", "${s[8:15]}"(<8 x i32> %vgpr)
ret void
}
; ERR error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal SGPR to VGPR copy
; GCN: ; illegal copy v[0:15] to s[16:31]
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 {
- %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}"()
- call void asm sideeffect "; use $0", "${SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23_SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}"(<16 x i32> %vgpr)
+ %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"()
+ call void asm sideeffect "; use $0", "${s[16:31]}"(<16 x i32> %vgpr)
ret void
}
diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll
index bc951a82becd..cd3502baee7b 100644
--- a/test/CodeGen/AMDGPU/immv216.ll
+++ b/test/CodeGen/AMDGPU/immv216.ll
@@ -124,7 +124,7 @@ define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST0]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -142,7 +142,7 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -160,7 +160,7 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -0.5, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -178,7 +178,7 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)*
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1.0, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -196,7 +196,7 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1.0, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -214,7 +214,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)*
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -232,7 +232,7 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2.0, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -250,7 +250,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)*
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 4.0, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -268,7 +268,7 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %ou
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -4.0, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -285,7 +285,7 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)*
; VI: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
; VI: buffer_load_dword
; VI-NOT: and
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
; VI: v_or_b32
; VI: buffer_store_dword
@@ -306,7 +306,7 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace
; VI-DAG: buffer_load_dword
; VI-NOT: and
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: buffer_store_dword
define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
@@ -325,7 +325,7 @@ define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %o
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -343,7 +343,7 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out,
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -361,7 +361,7 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out,
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 16, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -379,7 +379,7 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xffff
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -397,7 +397,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xfffe
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -415,7 +415,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONSTM16:v[0-9]+]], 0xfff0
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -433,7 +433,7 @@ define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)*
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 63, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST63]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -451,7 +451,7 @@ define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out
; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 64, [[VAL0]]
; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST64]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_or_b32
; VI: buffer_store_dword
define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index fab1f8d12253..0d20c32a4770 100644
--- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -379,7 +379,7 @@ entry:
%idx0 = load volatile i32, i32 addrspace(1)* %gep
%idx1 = add i32 %idx0, 1
%val0 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx0
- %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={SGPR4}" ()
+ %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" ()
%val1 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx1
store volatile i32 %val0, i32 addrspace(1)* %out0
store volatile i32 %val1, i32 addrspace(1)* %out0
diff --git a/test/CodeGen/AMDGPU/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll
index 36441cf778c2..c0f5218efc16 100644
--- a/test/CodeGen/AMDGPU/inline-asm.ll
+++ b/test/CodeGen/AMDGPU/inline-asm.ll
@@ -193,7 +193,7 @@ entry:
; CHECK: use v[0:1]
define amdgpu_kernel void @i64_imm_input_phys_vgpr() {
entry:
- call void asm sideeffect "; use $0 ", "{VGPR0_VGPR1}"(i64 123456)
+ call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456)
ret void
}
@@ -202,7 +202,7 @@ entry:
; CHECK: ; use v0
define amdgpu_kernel void @i1_imm_input_phys_vgpr() {
entry:
- call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 true)
+ call void asm sideeffect "; use $0 ", "{v0}"(i1 true)
ret void
}
@@ -215,7 +215,7 @@ entry:
define amdgpu_kernel void @i1_input_phys_vgpr() {
entry:
%val = load i1, i1 addrspace(1)* undef
- call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 %val)
+ call void asm sideeffect "; use $0 ", "{v0}"(i1 %val)
ret void
}
@@ -229,7 +229,7 @@ define amdgpu_kernel void @i1_input_phys_vgpr_x2() {
entry:
%val0 = load volatile i1, i1 addrspace(1)* undef
%val1 = load volatile i1, i1 addrspace(1)* undef
- call void asm sideeffect "; use $0 $1 ", "{VGPR0}, {VGPR1}"(i1 %val0, i1 %val1)
+ call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1)
ret void
}
@@ -240,8 +240,8 @@ entry:
; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
define amdgpu_kernel void @muliple_def_phys_vgpr() {
entry:
- %def0 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"()
- %def1 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"()
+ %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"()
+ %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"()
%add = shl i32 %def0, %def1
store i32 %add, i32 addrspace(1)* undef
ret void
diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index 1edccff3bf15..86fc41a23772 100644
--- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -261,7 +261,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace
; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e70000
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]]
-; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7
; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
@@ -285,7 +285,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out,
; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]]
-; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], -15, 16, [[ELT0]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
@@ -345,7 +345,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspac
; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x45000000
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]]
-; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500
; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
@@ -369,7 +369,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_1(<2 x half> addrspace(1)* %out
; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]]
-; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], 35, 16, [[ELT0]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
diff --git a/test/CodeGen/AMDGPU/limit-coalesce.mir b/test/CodeGen/AMDGPU/limit-coalesce.mir
index a0d2d6c097a2..7d6d8a5891cd 100644
--- a/test/CodeGen/AMDGPU/limit-coalesce.mir
+++ b/test/CodeGen/AMDGPU/limit-coalesce.mir
@@ -2,13 +2,13 @@
# Check that coalescer does not create wider register tuple than in source
-# CHECK: - { id: 2, class: vreg_64 }
-# CHECK: - { id: 3, class: vreg_64 }
-# CHECK: - { id: 4, class: vreg_64 }
-# CHECK: - { id: 5, class: vreg_96 }
-# CHECK: - { id: 6, class: vreg_96 }
-# CHECK: - { id: 7, class: vreg_128 }
-# CHECK: - { id: 8, class: vreg_128 }
+# CHECK: - { id: 2, class: vreg_64, preferred-register: '' }
+# CHECK: - { id: 3, class: vreg_64, preferred-register: '' }
+# CHECK: - { id: 4, class: vreg_64, preferred-register: '' }
+# CHECK: - { id: 5, class: vreg_96, preferred-register: '' }
+# CHECK: - { id: 6, class: vreg_96, preferred-register: '' }
+# CHECK: - { id: 7, class: vreg_128, preferred-register: '' }
+# CHECK: - { id: 8, class: vreg_128, preferred-register: '' }
# No more registers shall be defined
# CHECK-NEXT: liveins:
# CHECK: FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %4,
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll
new file mode 100644
index 000000000000..873a3f0f368f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.amdgcn.alignbit(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0
+
+; GCN-LABEL: {{^}}v_alignbit_b32:
+; GCN: v_alignbit_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}
+define amdgpu_kernel void @v_alignbit_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 {
+ %val = call i32 @llvm.amdgcn.alignbit(i32 %src1, i32 %src2, i32 %src3) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_alignbyte_b32:
+; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}
+define amdgpu_kernel void @v_alignbyte_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 {
+ %val = call i32 @llvm.amdgcn.alignbyte(i32 %src1, i32 %src2, i32 %src3) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
index 3a2b87cd87f3..83bc8b234724 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
@@ -4,18 +4,28 @@
declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
-; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v5, v1
+; GCN-DAG: v_mov_b32_e32 v4, v0
define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
- %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0
- store i64 %result, i64 addrspace(1)* %out, align 4
+ %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0
+ %tmp1 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0
+ %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0
+ store i64 %tmp2, i64 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate:
-; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_mqsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7]
+; GCN-DAG: v_mov_b32_e32 v3, v1
+; GCN-DAG: v_mov_b32_e32 v2, v0
define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) {
- %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0
- store i64 %result, i64 addrspace(1)* %out, align 4
+ %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+ %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+ %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0
+ %tmp3 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0
+ %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0
+ store i64 %tmp4, i64 addrspace(1)* %out, align 4
ret void
}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
index a8d03bf6bbac..685b5e0f29c4 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
@@ -3,45 +3,56 @@
declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0
-; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) {
- %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> <i32 100, i32 100, i32 100, i32 100>) #0
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
+; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate:
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
+define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
+ %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+ %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+ %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0
+ %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+ store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) {
- %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
- ret void
-}
-
-; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
- %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
+ %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+ %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+ %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0
+ %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+ store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
- %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
+ %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+ %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+ %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0
+ %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+ store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) {
%in = load <4 x i32>, <4 x i32> addrspace(1) * %input
-
- %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %in) #0
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
+ %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+ %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+ %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0
+ %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+ store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
ret void
}
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
index be71225c5e06..1f46613a8db0 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
@@ -4,18 +4,28 @@
declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
-; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v5, v1
+; GCN-DAG: v_mov_b32_e32 v4, v0
define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
- %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0
- store i64 %result, i64 addrspace(1)* %out, align 4
+ %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0
+ %tmp1 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0
+ %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0
+ store i64 %tmp2, i64 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate:
-; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_qsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7]
+; GCN-DAG: v_mov_b32_e32 v3, v1
+; GCN-DAG: v_mov_b32_e32 v2, v0
define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) {
- %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0
- store i64 %result, i64 addrspace(1)* %out, align 4
+ %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+ %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+ %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0
+ %tmp3 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0
+ %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0
+ store i64 %tmp4, i64 addrspace(1)* %out, align 4
ret void
}
diff --git a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index eec187390169..806723e5136c 100644
--- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -118,7 +118,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
; VI-FLUSH: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
-; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[B_V2_F16]], v[[C_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-FLUSH-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]]
; VI-FLUSH-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]]
; VI-FLUSH-NOT: v_and_b32
diff --git a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index a4353d1136e1..8f4b314ffabb 100644
--- a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -82,7 +82,7 @@ entry:
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NOT: and
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
@@ -110,7 +110,7 @@ entry:
; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
-; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
@@ -138,7 +138,7 @@ entry:
; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200
-; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
diff --git a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index 4875d26fc860..1a86286f7136 100644
--- a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -81,7 +81,7 @@ entry:
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NOT: and
; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
@@ -111,7 +111,7 @@ entry:
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
-; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
@@ -139,7 +139,7 @@ entry:
; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200
-; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
diff --git a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index 77d793201adc..49f00e9447da 100644
--- a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -608,11 +608,11 @@ ret:
; GCN: ;;#ASMSTART
; GCN: ; use s[0:1]
define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
- call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () #0
- call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () #0
- call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19}"() #0
- call void asm sideeffect "", "~{VGPR20_VGPR21}"() #0
- call void asm sideeffect "", "~{VGPR22}"() #0
+ call void asm sideeffect "", "~{v[0:7]}" () #0
+ call void asm sideeffect "", "~{v[8:15]}" () #0
+ call void asm sideeffect "", "~{v[16:19]}"() #0
+ call void asm sideeffect "", "~{v[20:21]}"() #0
+ call void asm sideeffect "", "~{v22}"() #0
%wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
%wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
diff --git a/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
new file mode 100644
index 000000000000..5b2da788a405
--- /dev/null
+++ b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -0,0 +1,131 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca < %s | FileCheck --check-prefix=OPT %s
+
+; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly
+; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but
+; the pass should handle it gracefully if it is
+; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt
+; should now leave these unchanged
+
+; OPT-LABEL: @promote_1d_aggr(
+; OPT: store [1 x float] %tmp3, [1 x float]* %f1
+
+%Block = type { [1 x float], i32 }
+%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] }
+
+@block = external addrspace(1) global %Block
+@pv = external addrspace(1) global %gl_PerVertex
+
+define amdgpu_vs void @promote_1d_aggr() #0 {
+ %i = alloca i32
+ %f1 = alloca [1 x float]
+ %tmp = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1
+ %tmp1 = load i32, i32 addrspace(1)* %tmp
+ store i32 %tmp1, i32* %i
+ %tmp2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0
+ %tmp3 = load [1 x float], [1 x float] addrspace(1)* %tmp2
+ store [1 x float] %tmp3, [1 x float]* %f1
+ %tmp4 = load i32, i32* %i
+ %tmp5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %tmp4
+ %tmp6 = load float, float* %tmp5
+ %tmp7 = alloca <4 x float>
+ %tmp8 = load <4 x float>, <4 x float>* %tmp7
+ %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0
+ %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1
+ %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2
+ %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3
+ %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
+ store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13
+ ret void
+}
+
+
+; OPT-LABEL: @promote_store_aggr(
+; OPT: %tmp6 = load [2 x float], [2 x float]* %f1
+
+%Block2 = type { i32, [2 x float] }
+@block2 = external addrspace(1) global %Block2
+
+define amdgpu_vs void @promote_store_aggr() #0 {
+ %i = alloca i32
+ %f1 = alloca [2 x float]
+ %tmp = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0
+ %tmp1 = load i32, i32 addrspace(1)* %tmp
+ store i32 %tmp1, i32* %i
+ %tmp2 = load i32, i32* %i
+ %tmp3 = sitofp i32 %tmp2 to float
+ %tmp4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0
+ store float %tmp3, float* %tmp4
+ %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1
+ store float 2.000000e+00, float* %tmp5
+ %tmp6 = load [2 x float], [2 x float]* %f1
+ %tmp7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1
+ store [2 x float] %tmp6, [2 x float] addrspace(1)* %tmp7
+ %tmp8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
+ store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* %tmp8
+ ret void
+}
+
+; OPT-LABEL: @promote_load_from_store_aggr(
+; OPT: store [2 x float] %tmp3, [2 x float]* %f1
+
+%Block3 = type { [2 x float], i32 }
+@block3 = external addrspace(1) global %Block3
+
+define amdgpu_vs void @promote_load_from_store_aggr() #0 {
+ %i = alloca i32
+ %f1 = alloca [2 x float]
+ %tmp = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1
+ %tmp1 = load i32, i32 addrspace(1)* %tmp
+ store i32 %tmp1, i32* %i
+ %tmp2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0
+ %tmp3 = load [2 x float], [2 x float] addrspace(1)* %tmp2
+ store [2 x float] %tmp3, [2 x float]* %f1
+ %tmp4 = load i32, i32* %i
+ %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %tmp4
+ %tmp6 = load float, float* %tmp5
+ %tmp7 = alloca <4 x float>
+ %tmp8 = load <4 x float>, <4 x float>* %tmp7
+ %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0
+ %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1
+ %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2
+ %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3
+ %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
+ store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13
+ ret void
+}
+
+; OPT-LABEL: @promote_double_aggr(
+; OPT: store [2 x double] %tmp5, [2 x double]* %s
+
+@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> }
+@frag_color = external addrspace(1) global <4 x float>
+
+define amdgpu_ps void @promote_double_aggr() #0 {
+ %s = alloca [2 x double]
+ %tmp = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
+ %tmp1 = load double, double addrspace(1)* %tmp
+ %tmp2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
+ %tmp3 = load double, double addrspace(1)* %tmp2
+ %tmp4 = insertvalue [2 x double] undef, double %tmp1, 0
+ %tmp5 = insertvalue [2 x double] %tmp4, double %tmp3, 1
+ store [2 x double] %tmp5, [2 x double]* %s
+ %tmp6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
+ %tmp7 = load double, double* %tmp6
+ %tmp8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
+ %tmp9 = load double, double* %tmp8
+ %tmp10 = fadd double %tmp7, %tmp9
+ %tmp11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
+ store double %tmp10, double* %tmp11
+ %tmp12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
+ %tmp13 = load double, double* %tmp12
+ %tmp14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
+ %tmp15 = load double, double* %tmp14
+ %tmp16 = fadd double %tmp13, %tmp15
+ %tmp17 = fptrunc double %tmp16 to float
+ %tmp18 = insertelement <4 x float> undef, float %tmp17, i32 0
+ %tmp19 = insertelement <4 x float> %tmp18, float %tmp17, i32 1
+ %tmp20 = insertelement <4 x float> %tmp19, float %tmp17, i32 2
+ %tmp21 = insertelement <4 x float> %tmp20, float %tmp17, i32 3
+ store <4 x float> %tmp21, <4 x float> addrspace(1)* @frag_color
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
new file mode 100644
index 000000000000..1a0d68d81f97
--- /dev/null
+++ b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
@@ -0,0 +1,69 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s
+---
+
+# GCN-LABEL: name: mac_invalid_operands
+# GCN: undef %18.sub0 = V_MAC_F32_e32 undef %3, undef %9, undef %18.sub0, implicit %exec
+
+name: mac_invalid_operands
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_128 }
+ - { id: 1, class: vreg_128 }
+ - { id: 2, class: sgpr_64 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: vgpr_32 }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: sreg_64 }
+ - { id: 8, class: vgpr_32 }
+ - { id: 9, class: vgpr_32 }
+ - { id: 10, class: vreg_64 }
+ - { id: 11, class: vreg_64 }
+ - { id: 12, class: vreg_128 }
+ - { id: 13, class: vreg_128 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vreg_64 }
+ - { id: 16, class: vgpr_32 }
+ - { id: 17, class: vreg_128 }
+body: |
+ bb.0:
+ successors: %bb.2, %bb.1
+
+ %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec
+ %vcc = COPY killed %7
+ S_CBRANCH_VCCZ %bb.2, implicit killed %vcc
+
+ bb.1:
+ successors: %bb.3
+
+ %4 = V_ADD_F32_e32 undef %6, undef %5, implicit %exec
+ undef %12.sub0 = COPY killed %4
+ %17 = COPY killed %12
+ S_BRANCH %bb.3
+
+ bb.2:
+ successors: %bb.3
+
+ %8 = V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit %exec
+ undef %13.sub0 = COPY %8
+ %13.sub1 = COPY %8
+ %13.sub2 = COPY killed %8
+ %0 = COPY killed %13
+ %17 = COPY killed %0
+
+ bb.3:
+ %1 = COPY killed %17
+ FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit %exec, implicit %flat_scr
+ %14 = COPY %1.sub1
+ %16 = COPY killed %1.sub0
+ undef %15.sub0 = COPY killed %16
+ %15.sub1 = COPY killed %14
+ FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit %exec, implicit %flat_scr
+ S_ENDPGM
+
+...
diff --git a/test/CodeGen/AMDGPU/scratch-simple.ll b/test/CodeGen/AMDGPU/scratch-simple.ll
index 6ed730ad60f4..abd15f1fb47f 100644
--- a/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -12,8 +12,10 @@
; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
-; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], 0x200, [[CLAMP_IDX]]
-; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], 0x400, [[CLAMP_IDX]]
+; GCN-DAG: v_mov_b32_e32 [[C200:v[0-9]+]], 0x200
+; GCN-DAG: v_mov_b32_e32 [[C400:v[0-9]+]], 0x400
+; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[C200]], [[CLAMP_IDX]]
+; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[C400]], [[CLAMP_IDX]]
; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
diff --git a/test/CodeGen/AMDGPU/sdwa-peephole.ll b/test/CodeGen/AMDGPU/sdwa-peephole.ll
index a319edfc5ace..66e166d283f7 100644
--- a/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -74,7 +74,7 @@ entry:
; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_HI:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_HI]], v[[DST_MUL_LO]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
entry:
@@ -97,8 +97,8 @@ entry:
; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL2:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL3:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) {
entry:
@@ -125,10 +125,10 @@ entry:
; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL5:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL6:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL7:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL7]], v[[DST_MUL6]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL5]], v[[DST_MUL4]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL6]], v[[DST_MUL7]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL4]], v[[DST_MUL5]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) {
entry:
@@ -347,8 +347,8 @@ entry:
; NOSDWA-NOT: v_mul_u32_u24_sdwa
; SDWA-DAG: v_mov_b32_e32 v[[M321:[0-9]+]], 0x141
; SDWA-DAG: v_mov_b32_e32 v[[M123:[0-9]+]], 0x7b
-; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M123]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M321]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M123]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M321]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
@@ -367,7 +367,7 @@ entry:
; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; NOSDWA-NOT: v_mul_u32_u24_sdwa
-; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
entry:
@@ -408,9 +408,9 @@ store_label:
; NOSDWA-NOT: v_and_b32_sdwa
; NOSDWA-NOT: v_or_b32_sdwa
-; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
-; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
diff --git a/test/CodeGen/AMDGPU/shl.v2i16.ll b/test/CodeGen/AMDGPU/shl.v2i16.ll
index 115221c5316d..839854fd575b 100644
--- a/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -10,7 +10,7 @@
; VI: v_lshlrev_b32_e32
; VI: v_lshlrev_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; CI: v_lshlrev_b32_e32
; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
diff --git a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
index 114c97b61bd4..a57e7b13453f 100644
--- a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
+++ b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
@@ -25,50 +25,50 @@
; SMEM: s_dcache_wb
; ALL: s_endpgm
define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
- call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
- call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
- call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
- call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
- call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
- call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
- call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
- call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
- call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
- call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
- call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
- call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
- call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" ()
- call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" ()
- call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19_VGPR20_VGPR21_VGPR22_VGPR23}" ()
- call void asm sideeffect "", "~{VGPR24_VGPR25_VGPR26_VGPR27_VGPR28_VGPR29_VGPR30_VGPR31}" ()
- call void asm sideeffect "", "~{VGPR32_VGPR33_VGPR34_VGPR35_VGPR36_VGPR37_VGPR38_VGPR39}" ()
- call void asm sideeffect "", "~{VGPR40_VGPR41_VGPR42_VGPR43_VGPR44_VGPR45_VGPR46_VGPR47}" ()
- call void asm sideeffect "", "~{VGPR48_VGPR49_VGPR50_VGPR51_VGPR52_VGPR53_VGPR54_VGPR55}" ()
- call void asm sideeffect "", "~{VGPR56_VGPR57_VGPR58_VGPR59_VGPR60_VGPR61_VGPR62_VGPR63}" ()
- call void asm sideeffect "", "~{VGPR64_VGPR65_VGPR66_VGPR67_VGPR68_VGPR69_VGPR70_VGPR71}" ()
- call void asm sideeffect "", "~{VGPR72_VGPR73_VGPR74_VGPR75_VGPR76_VGPR77_VGPR78_VGPR79}" ()
- call void asm sideeffect "", "~{VGPR80_VGPR81_VGPR82_VGPR83_VGPR84_VGPR85_VGPR86_VGPR87}" ()
- call void asm sideeffect "", "~{VGPR88_VGPR89_VGPR90_VGPR91_VGPR92_VGPR93_VGPR94_VGPR95}" ()
- call void asm sideeffect "", "~{VGPR96_VGPR97_VGPR98_VGPR99_VGPR100_VGPR101_VGPR102_VGPR103}" ()
- call void asm sideeffect "", "~{VGPR104_VGPR105_VGPR106_VGPR107_VGPR108_VGPR109_VGPR110_VGPR111}" ()
- call void asm sideeffect "", "~{VGPR112_VGPR113_VGPR114_VGPR115_VGPR116_VGPR117_VGPR118_VGPR119}" ()
- call void asm sideeffect "", "~{VGPR120_VGPR121_VGPR122_VGPR123_VGPR124_VGPR125_VGPR126_VGPR127}" ()
- call void asm sideeffect "", "~{VGPR128_VGPR129_VGPR130_VGPR131_VGPR132_VGPR133_VGPR134_VGPR135}" ()
- call void asm sideeffect "", "~{VGPR136_VGPR137_VGPR138_VGPR139_VGPR140_VGPR141_VGPR142_VGPR143}" ()
- call void asm sideeffect "", "~{VGPR144_VGPR145_VGPR146_VGPR147_VGPR148_VGPR149_VGPR150_VGPR151}" ()
- call void asm sideeffect "", "~{VGPR152_VGPR153_VGPR154_VGPR155_VGPR156_VGPR157_VGPR158_VGPR159}" ()
- call void asm sideeffect "", "~{VGPR160_VGPR161_VGPR162_VGPR163_VGPR164_VGPR165_VGPR166_VGPR167}" ()
- call void asm sideeffect "", "~{VGPR168_VGPR169_VGPR170_VGPR171_VGPR172_VGPR173_VGPR174_VGPR175}" ()
- call void asm sideeffect "", "~{VGPR176_VGPR177_VGPR178_VGPR179_VGPR180_VGPR181_VGPR182_VGPR183}" ()
- call void asm sideeffect "", "~{VGPR184_VGPR185_VGPR186_VGPR187_VGPR188_VGPR189_VGPR190_VGPR191}" ()
- call void asm sideeffect "", "~{VGPR192_VGPR193_VGPR194_VGPR195_VGPR196_VGPR197_VGPR198_VGPR199}" ()
- call void asm sideeffect "", "~{VGPR200_VGPR201_VGPR202_VGPR203_VGPR204_VGPR205_VGPR206_VGPR207}" ()
- call void asm sideeffect "", "~{VGPR208_VGPR209_VGPR210_VGPR211_VGPR212_VGPR213_VGPR214_VGPR215}" ()
- call void asm sideeffect "", "~{VGPR216_VGPR217_VGPR218_VGPR219_VGPR220_VGPR221_VGPR222_VGPR223}" ()
- call void asm sideeffect "", "~{VGPR224_VGPR225_VGPR226_VGPR227_VGPR228_VGPR229_VGPR230_VGPR231}" ()
- call void asm sideeffect "", "~{VGPR232_VGPR233_VGPR234_VGPR235_VGPR236_VGPR237_VGPR238_VGPR239}" ()
- call void asm sideeffect "", "~{VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247}" ()
- call void asm sideeffect "", "~{VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255}" ()
+ call void asm sideeffect "", "~{s[0:7]}" ()
+ call void asm sideeffect "", "~{s[8:15]}" ()
+ call void asm sideeffect "", "~{s[16:23]}" ()
+ call void asm sideeffect "", "~{s[24:31]}" ()
+ call void asm sideeffect "", "~{s[32:39]}" ()
+ call void asm sideeffect "", "~{s[40:47]}" ()
+ call void asm sideeffect "", "~{s[48:55]}" ()
+ call void asm sideeffect "", "~{s[56:63]}" ()
+ call void asm sideeffect "", "~{s[64:71]}" ()
+ call void asm sideeffect "", "~{s[72:79]}" ()
+ call void asm sideeffect "", "~{s[80:87]}" ()
+ call void asm sideeffect "", "~{s[88:95]}" ()
+ call void asm sideeffect "", "~{v[0:7]}" ()
+ call void asm sideeffect "", "~{v[8:15]}" ()
+ call void asm sideeffect "", "~{v[16:23]}" ()
+ call void asm sideeffect "", "~{v[24:31]}" ()
+ call void asm sideeffect "", "~{v[32:39]}" ()
+ call void asm sideeffect "", "~{v[40:47]}" ()
+ call void asm sideeffect "", "~{v[48:55]}" ()
+ call void asm sideeffect "", "~{v[56:63]}" ()
+ call void asm sideeffect "", "~{v[64:71]}" ()
+ call void asm sideeffect "", "~{v[72:79]}" ()
+ call void asm sideeffect "", "~{v[80:87]}" ()
+ call void asm sideeffect "", "~{v[88:95]}" ()
+ call void asm sideeffect "", "~{v[96:103]}" ()
+ call void asm sideeffect "", "~{v[104:111]}" ()
+ call void asm sideeffect "", "~{v[112:119]}" ()
+ call void asm sideeffect "", "~{v[120:127]}" ()
+ call void asm sideeffect "", "~{v[128:135]}" ()
+ call void asm sideeffect "", "~{v[136:143]}" ()
+ call void asm sideeffect "", "~{v[144:151]}" ()
+ call void asm sideeffect "", "~{v[152:159]}" ()
+ call void asm sideeffect "", "~{v[160:167]}" ()
+ call void asm sideeffect "", "~{v[168:175]}" ()
+ call void asm sideeffect "", "~{v[176:183]}" ()
+ call void asm sideeffect "", "~{v[184:191]}" ()
+ call void asm sideeffect "", "~{v[192:199]}" ()
+ call void asm sideeffect "", "~{v[200:207]}" ()
+ call void asm sideeffect "", "~{v[208:215]}" ()
+ call void asm sideeffect "", "~{v[216:223]}" ()
+ call void asm sideeffect "", "~{v[224:231]}" ()
+ call void asm sideeffect "", "~{v[232:239]}" ()
+ call void asm sideeffect "", "~{v[240:247]}" ()
+ call void asm sideeffect "", "~{v[248:255]}" ()
store i32 %in, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll
index 3f53572ab440..ea8b87f1dee2 100644
--- a/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -79,7 +79,7 @@ define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
call void @llvm.AMDGPU.kill(float %x)
- %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"()
+ %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
call void @llvm.AMDGPU.kill(float %y)
ret void
}
@@ -128,7 +128,7 @@ bb:
v_nop_e64
v_nop_e64
v_nop_e64
- v_nop_e64", "={VGPR7}"()
+ v_nop_e64", "={v7}"()
call void @llvm.AMDGPU.kill(float %var)
br label %exit
@@ -186,11 +186,11 @@ bb:
v_nop_e64
v_nop_e64
v_nop_e64
- v_nop_e64", "={VGPR7}"()
- %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"()
+ v_nop_e64", "={v7}"()
+ %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
call void @llvm.AMDGPU.kill(float %var)
store volatile float %live.across, float addrspace(1)* undef
- %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"()
+ %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
br label %exit
exit:
@@ -242,7 +242,7 @@ bb:
v_nop_e64
v_nop_e64
v_nop_e64
- v_nop_e64", "={VGPR7}"()
+ v_nop_e64", "={v7}"()
call void @llvm.AMDGPU.kill(float %var)
%vgpr = load volatile i32, i32 addrspace(1)* undef
%loop.cond = icmp eq i32 %vgpr, 0
diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll
index 5d71ad2c8ba3..a9aac2d8abb7 100644
--- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll
+++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll
@@ -10,11 +10,11 @@
; VI: v_sub_i32_e32
; VI-DAG: v_sub_i32_e32
-; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; VI: v_add_i32_e32
; VI: v_add_i32_e32
-; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; CI: v_sub_i32_e32
; CI-DAG: v_sub_i32_e32
@@ -47,7 +47,7 @@ define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %
; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}}
-; VI: v_add_u16_sdwa v{{[0-9]+}}, [[TWO]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[TWO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NOT: v_and_b32
; VI: v_or_b32_e32
define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
diff --git a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index c05021a91ff0..a23461a0a514 100644
--- a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -20,13 +20,13 @@ entry:
%a = load <1280 x i32>, <1280 x i32> addrspace(1)* %aptr
; mark most VGPR registers as used to increase register pressure
- call void asm sideeffect "", "~{VGPR4},~{VGPR8},~{VGPR12},~{VGPR16},~{VGPR20},~{VGPR24},~{VGPR28},~{VGPR32}" ()
- call void asm sideeffect "", "~{VGPR36},~{VGPR40},~{VGPR44},~{VGPR48},~{VGPR52},~{VGPR56},~{VGPR60},~{VGPR64}" ()
- call void asm sideeffect "", "~{VGPR68},~{VGPR72},~{VGPR76},~{VGPR80},~{VGPR84},~{VGPR88},~{VGPR92},~{VGPR96}" ()
- call void asm sideeffect "", "~{VGPR100},~{VGPR104},~{VGPR108},~{VGPR112},~{VGPR116},~{VGPR120},~{VGPR124},~{VGPR128}" ()
- call void asm sideeffect "", "~{VGPR132},~{VGPR136},~{VGPR140},~{VGPR144},~{VGPR148},~{VGPR152},~{VGPR156},~{VGPR160}" ()
- call void asm sideeffect "", "~{VGPR164},~{VGPR168},~{VGPR172},~{VGPR176},~{VGPR180},~{VGPR184},~{VGPR188},~{VGPR192}" ()
- call void asm sideeffect "", "~{VGPR196},~{VGPR200},~{VGPR204},~{VGPR208},~{VGPR212},~{VGPR216},~{VGPR220},~{VGPR224}" ()
+ call void asm sideeffect "", "~{v4},~{v8},~{v12},~{v16},~{v20},~{v24},~{v28},~{v32}" ()
+ call void asm sideeffect "", "~{v36},~{v40},~{v44},~{v48},~{v52},~{v56},~{v60},~{v64}" ()
+ call void asm sideeffect "", "~{v68},~{v72},~{v76},~{v80},~{v84},~{v88},~{v92},~{v96}" ()
+ call void asm sideeffect "", "~{v100},~{v104},~{v108},~{v112},~{v116},~{v120},~{v124},~{v128}" ()
+ call void asm sideeffect "", "~{v132},~{v136},~{v140},~{v144},~{v148},~{v152},~{v156},~{v160}" ()
+ call void asm sideeffect "", "~{v164},~{v168},~{v172},~{v176},~{v180},~{v184},~{v188},~{v192}" ()
+ call void asm sideeffect "", "~{v196},~{v200},~{v204},~{v208},~{v212},~{v216},~{v220},~{v224}" ()
%outptr = getelementptr <1280 x i32>, <1280 x i32> addrspace(1)* %out, i32 %tid
store <1280 x i32> %a, <1280 x i32> addrspace(1)* %outptr
diff --git a/test/CodeGen/AMDGPU/sub.v2i16.ll b/test/CodeGen/AMDGPU/sub.v2i16.ll
index 6aeff3fc3b6c..ee923e2b8b61 100644
--- a/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -5,7 +5,7 @@
; GCN-LABEL: {{^}}v_test_sub_v2i16:
; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_subrev_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -62,7 +62,7 @@ define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out
; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfffffe38
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xffffff85, v{{[0-9]+}}
define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -80,7 +80,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou
; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3df
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}}
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x34d, v{{[0-9]+}}
define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -98,7 +98,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)*
; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; VI: flat_load_ushort [[LOAD0:v[0-9]+]]
; VI: flat_load_ushort [[LOAD1:v[0-9]+]]
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[ONE]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], [[ONE]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 1, [[LOAD1]]
; VI: v_or_b32_e32
define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
@@ -137,7 +137,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac
; VI-NOT: v_subrev_i16
; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffffc080
-; VI: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NOT: v_subrev_i16
; VI: v_or_b32_e32
define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
@@ -252,7 +252,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)
; GFX9: v_pk_sub_i16
; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
-; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI: v_subrev_u16_e32
; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
diff --git a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
index 3e80fcf85b52..1e08f51dabde 100644
--- a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
+++ b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
@@ -73,14 +73,14 @@ bb11: ; preds = %bb9
; CHECK: buffer_store_dwordx4 v{{\[}}[[OUTPUT_LO]]:[[OUTPUT_HI]]{{\]}}
define amdgpu_kernel void @partially_undef_copy() #0 {
- %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={VGPR5}"()
- %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={VGPR6}"()
+ %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={v5}"()
+ %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={v6}"()
%partially.undef.0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
%partially.undef.1 = insertelement <4 x i32> %partially.undef.0, i32 %tmp1, i32 0
store volatile <4 x i32> %partially.undef.1, <4 x i32> addrspace(1)* undef, align 16
- tail call void asm sideeffect "v_nop", "v={VGPR5_VGPR6_VGPR7_VGPR8}"(<4 x i32> %partially.undef.0)
+ tail call void asm sideeffect "v_nop", "v={v[5:8]}"(<4 x i32> %partially.undef.0)
ret void
}
diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll
index 3da1a0324042..ce4a69db3506 100644
--- a/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -304,14 +304,14 @@ entry:
; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
-; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
-; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
-; SI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
-; SI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
-; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
-; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
+; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
+; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
+; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
+; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
+; SI: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]]
+; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
; SI-DAG: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]]
; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_LO:[0-9]+]], v[[C_F32_0]]
; SI-DAG: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]]
@@ -320,12 +320,12 @@ entry:
; VI-NOT: and
; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
-; VI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
-; VI-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]]
-; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]]
+; VI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
+; VI-DAG: v_mac_f16_sdwa v[[C_F16_1]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]]
+; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]]
; VI-NOT: and
-; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[A_V2_F16]]
+; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[C_V2_F16]]
; GCN: {{buffer|flat}}_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
@@ -336,7 +336,9 @@ define amdgpu_kernel void @mac_v2f16(
<2 x half> addrspace(1)* %c) #0 {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+ call void @llvm.amdgcn.s.barrier() #2
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+ call void @llvm.amdgcn.s.barrier() #2
%c.val = load <2 x half>, <2 x half> addrspace(1)* %c
%t.val = fmul <2 x half> %a.val, %b.val
@@ -485,7 +487,7 @@ entry:
; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI-DAG: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
; VI-DAG: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
; GCN: s_endpgm
@@ -517,7 +519,7 @@ entry:
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
; GCN: s_endpgm
@@ -670,5 +672,8 @@ entry:
ret void
}
+declare void @llvm.amdgcn.s.barrier() #2
+
attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
+attributes #2 = { nounwind convergent }
diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
new file mode 100644
index 000000000000..d7f208d4cf59
--- /dev/null
+++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
@@ -0,0 +1,149 @@
+# RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+ define void @test_mla() #0 { ret void }
+ define void @test_mla_v5() #1 { ret void }
+
+ define void @test_mls() #2 { ret void }
+ define void @test_no_mls() { ret void }
+
+ attributes #0 = { "target-features"="+v6" }
+ attributes #1 = { "target-features"="-v6" }
+ attributes #2 = { "target-features"="+v6t2" }
+...
+---
+name: test_mla
+# CHECK-LABEL: name: test_mla
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+ - { id: 3, class: gprb }
+ - { id: 4, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = COPY %r2
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+ ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2
+
+ %3(s32) = G_MUL %0, %1
+ %4(s32) = G_ADD %3, %2
+ ; CHECK: [[VREGR:%[0-9]+]] = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _
+
+ %r0 = COPY %4(s32)
+ ; CHECK: %r0 = COPY [[VREGR]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_mla_v5
+# CHECK-LABEL: name: test_mla_v5
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+ - { id: 3, class: gprb }
+ - { id: 4, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = COPY %r2
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+ ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2
+
+ %3(s32) = G_MUL %0, %1
+ %4(s32) = G_ADD %3, %2
+ ; CHECK: [[VREGR:%[0-9]+]] = MLAv5 [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _
+
+ %r0 = COPY %4(s32)
+ ; CHECK: %r0 = COPY [[VREGR]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_mls
+# CHECK-LABEL: name: test_mls
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+ - { id: 3, class: gprb }
+ - { id: 4, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = COPY %r2
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+ ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2
+
+ %3(s32) = G_MUL %0, %1
+ %4(s32) = G_SUB %2, %3
+ ; CHECK: [[VREGR:%[0-9]+]] = MLS [[VREGX]], [[VREGY]], [[VREGZ]], 14, _
+
+ %r0 = COPY %4(s32)
+ ; CHECK: %r0 = COPY [[VREGR]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_no_mls
+# CHECK-LABEL: name: test_no_mls
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+ - { id: 3, class: gprb }
+ - { id: 4, class: gprb }
+body: |
+ bb.0:
+ liveins: %r0, %r1, %r2
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = COPY %r2
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+ ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2
+
+ %3(s32) = G_MUL %0, %1
+ %4(s32) = G_SUB %2, %3
+ ; CHECK: [[VREGM:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _
+ ; CHECK: [[VREGR:%[0-9]+]] = SUBrr [[VREGZ]], [[VREGM]], 14, _, _
+
+ %r0 = COPY %4(s32)
+ ; CHECK: %r0 = COPY [[VREGR]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
index 72c3b715d36e..16642d85d9cf 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
@@ -28,6 +28,10 @@
define void @test_sdiv_s32() #2 { ret void }
define void @test_udiv_s32() #2 { ret void }
+ define void @test_and_s32() { ret void }
+ define void @test_or_s32() { ret void }
+ define void @test_xor_s32() { ret void }
+
define void @test_load_from_stack() { ret void }
define void @test_load_f32() #0 { ret void }
define void @test_load_f64() #0 { ret void }
@@ -783,6 +787,105 @@ body: |
; CHECK: BX_RET 14, _, implicit %r0
...
---
+name: test_and_s32
+# CHECK-LABEL: name: test_and_s32
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK: id: 0, class: gpr
+# CHECK: id: 1, class: gpr
+# CHECK: id: 2, class: gpr
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s32) = G_AND %0, %1
+ ; CHECK: [[VREGRES:%[0-9]+]] = ANDrr [[VREGX]], [[VREGY]], 14, _
+
+ %r0 = COPY %2(s32)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_or_s32
+# CHECK-LABEL: name: test_or_s32
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK: id: 0, class: gpr
+# CHECK: id: 1, class: gpr
+# CHECK: id: 2, class: gpr
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s32) = G_OR %0, %1
+ ; CHECK: [[VREGRES:%[0-9]+]] = ORRrr [[VREGX]], [[VREGY]], 14, _
+
+ %r0 = COPY %2(s32)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name: test_xor_s32
+# CHECK-LABEL: name: test_xor_s32
+legalized: true
+regBankSelected: true
+selected: false
+# CHECK: selected: true
+registers:
+ - { id: 0, class: gprb }
+ - { id: 1, class: gprb }
+ - { id: 2, class: gprb }
+# CHECK: id: 0, class: gpr
+# CHECK: id: 1, class: gpr
+# CHECK: id: 2, class: gpr
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+ %1(s32) = COPY %r1
+ ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+ %2(s32) = G_XOR %0, %1
+ ; CHECK: [[VREGRES:%[0-9]+]] = EORrr [[VREGX]], [[VREGY]], 14, _
+
+ %r0 = COPY %2(s32)
+ ; CHECK: %r0 = COPY [[VREGRES]]
+
+ BX_RET 14, _, implicit %r0
+ ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
name: test_load_from_stack
# CHECK-LABEL: name: test_load_from_stack
legalized: true
@@ -802,8 +905,8 @@ fixedStack:
- { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false }
- { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
- { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-# CHECK-DAG: id: [[FI1:[0-9]+]], offset: 0
-# CHECK-DAG: id: [[FI32:[0-9]+]], offset: 8
+# CHECK-DAG: id: [[FI1:[0-9]+]], type: default, offset: 0, size: 1
+# CHECK-DAG: id: [[FI32:[0-9]+]], type: default, offset: 8
body: |
bb.0:
liveins: %r0, %r1, %r2, %r3
@@ -1024,13 +1127,11 @@ body: |
%1(s32) = COPY %r3
; CHECK: [[IN2:%[0-9]+]] = COPY %r3
- %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 1
+ %2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
; CHECK: %[[DREG]] = VMOVDRR [[IN1]], [[IN2]]
- %3(s32) = G_EXTRACT %2(s64), 0
- %4(s32) = G_EXTRACT %2(s64), 32
- ; CHECK: [[OUT1:%[0-9]+]] = VGETLNi32 %[[DREG]], 0
- ; CHECK: [[OUT2:%[0-9]+]] = VGETLNi32 %[[DREG]], 1
+ %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64)
+ ; CHECK: [[OUT1:%[0-9]+]], [[OUT2:%[0-9]+]] = VMOVRRD %[[DREG]]
%r0 = COPY %3
; CHECK: %r0 = COPY [[OUT1]]
diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index 1c7769894a27..05902c22fb98 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE
-; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
+; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE
+; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
define void @test_void_return() {
; CHECK-LABEL: name: test_void_return
@@ -329,15 +329,13 @@ define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p
; CHECK: liveins: %r0, %r1, %r2, %r3
; CHECK-DAG: [[VREGP1LO:%[0-9]+]](s32) = COPY %r2
; CHECK-DAG: [[VREGP1HI:%[0-9]+]](s32) = COPY %r3
-; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1LO]](s32), 0, [[VREGP1HI]](s32), 32
-; BIG: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1HI]](s32), 0, [[VREGP1LO]](s32), 32
+; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1LO]](s32), [[VREGP1HI]](s32)
+; BIG: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1HI]](s32), [[VREGP1LO]](s32)
; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]]
; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0){{.*}}load 8
; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGP5]]
-; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
-; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
+; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
; CHECK-DAG: %r0 = COPY [[VREGVLO]]
; CHECK-DAG: %r1 = COPY [[VREGVHI]]
; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
@@ -376,15 +374,13 @@ define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0,
; CHECK: liveins: %r0, %r2, %r3
; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r2
; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r3
-; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32
-; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32
+; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32)
+; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32)
; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]]
; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8
; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]]
-; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
-; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
+; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
; CHECK-DAG: %r0 = COPY [[VREGVLO]]
; CHECK-DAG: %r1 = COPY [[VREGVHI]]
; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
@@ -401,15 +397,13 @@ define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler,
; CHECK: liveins: %r0, %r1, %r2
; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r0
; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r1
-; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32
-; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32
+; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32)
+; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32)
; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]]
; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8
; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]]
-; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
-; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
+; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
; CHECK-DAG: %r0 = COPY [[VREGVLO]]
; CHECK-DAG: %r1 = COPY [[VREGVHI]]
; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
@@ -420,9 +414,11 @@ entry:
define arm_aapcscc void @test_indirect_call(void() *%fptr) {
; CHECK-LABEL: name: test_indirect_call
-; CHECK: [[FPTR:%[0-9]+]](p0) = COPY %r0
+; CHECK: registers:
+; CHECK-NEXT: id: [[FPTR:[0-9]+]], class: gpr
+; CHECK: %[[FPTR]](p0) = COPY %r0
; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp
-; CHECK: BLX [[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp
+; CHECK: BLX %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp
; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp
entry:
notail call arm_aapcscc void %fptr()
@@ -566,13 +562,12 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) {
; CHECK-LABEL: name: test_call_aapcs_fp_params
; CHECK-DAG: [[A1:%[0-9]+]](s32) = COPY %r0
; CHECK-DAG: [[A2:%[0-9]+]](s32) = COPY %r1
-; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A1]](s32), 0, [[A2]](s32), 32
-; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A2]](s32), 0, [[A1]](s32), 32
+; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A1]](s32), [[A2]](s32)
+; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A2]](s32), [[A1]](s32)
; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r2
; CHECK: ADJCALLSTACKDOWN 16, 0, 14, _, implicit-def %sp, implicit %sp
; CHECK-DAG: %r0 = COPY [[BVREG]]
-; CHECK-DAG: [[A1:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 0
-; CHECK-DAG: [[A2:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 32
+; CHECK-DAG: [[A1:%[0-9]+]](s32), [[A2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[AVREG]](s64)
; LITTLE-DAG: %r2 = COPY [[A1]]
; LITTLE-DAG: %r3 = COPY [[A2]]
; BIG-DAG: %r2 = COPY [[A2]]
@@ -588,11 +583,10 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) {
; CHECK: BLX @aapcscc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r0
; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r1
-; LITTLE: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R1]](s32), 0, [[R2]](s32), 32
-; BIG: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R2]](s32), 0, [[R1]](s32), 32
+; LITTLE: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R1]](s32), [[R2]](s32)
+; BIG: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R2]](s32), [[R1]](s32)
; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp
-; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 0
-; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 32
+; CHECK: [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[RVREG]](s64)
; LITTLE-DAG: %r0 = COPY [[R1]]
; LITTLE-DAG: %r1 = COPY [[R2]]
; BIG-DAG: %r0 = COPY [[R2]]
@@ -702,8 +696,8 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) {
; CHECK: fixedStack:
; The parameters live in separate stack locations, one for each element that
; doesn't fit in the registers.
-; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], offset: 0, size: 4
-; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], offset: 60, size: 4
+; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4,
+; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 60, size: 4
; CHECK: liveins: %r0, %r1, %r2, %r3
; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0
; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1
@@ -755,16 +749,16 @@ declare arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double])
define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) {
; CHECK-LABEL: name: test_fp_arrays_aapcs
; CHECK: fixedStack:
-; CHECK: id: [[ARR2_ID:[0-9]+]], offset: 0, size: 8
+; CHECK: id: [[ARR2_ID:[0-9]+]], type: default, offset: 0, size: 8,
; CHECK: liveins: %r0, %r1, %r2, %r3
; CHECK: [[ARR0_0:%[0-9]+]](s32) = COPY %r0
; CHECK: [[ARR0_1:%[0-9]+]](s32) = COPY %r1
-; LITTLE: [[ARR0:%[0-9]+]](s64) = G_SEQUENCE [[ARR0_0]](s32), 0, [[ARR0_1]](s32), 32
-; BIG: [[ARR0:%[0-9]+]](s64) = G_SEQUENCE [[ARR0_1]](s32), 0, [[ARR0_0]](s32), 32
+; LITTLE: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_0]](s32), [[ARR0_1]](s32)
+; BIG: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_1]](s32), [[ARR0_0]](s32)
; CHECK: [[ARR1_0:%[0-9]+]](s32) = COPY %r2
; CHECK: [[ARR1_1:%[0-9]+]](s32) = COPY %r3
-; LITTLE: [[ARR1:%[0-9]+]](s64) = G_SEQUENCE [[ARR1_0]](s32), 0, [[ARR1_1]](s32), 32
-; BIG: [[ARR1:%[0-9]+]](s64) = G_SEQUENCE [[ARR1_1]](s32), 0, [[ARR1_0]](s32), 32
+; LITTLE: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_0]](s32), [[ARR1_1]](s32)
+; BIG: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_1]](s32), [[ARR1_0]](s32)
; CHECK: [[ARR2_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]]
; CHECK: [[ARR2:%[0-9]+]](s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]]
; CHECK: [[ARR_MERGED_0:%[0-9]+]](s192) = IMPLICIT_DEF
@@ -776,14 +770,12 @@ define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) {
; CHECK: [[ARR0:%[0-9]+]](s64) = G_EXTRACT [[ARR_MERGED]](s192), 0
; CHECK: [[ARR1:%[0-9]+]](s64) = G_EXTRACT [[ARR_MERGED]](s192), 64
; CHECK: [[ARR2:%[0-9]+]](s64) = G_EXTRACT [[ARR_MERGED]](s192), 128
-; CHECK: [[ARR0_0:%[0-9]+]](s32) = G_EXTRACT [[ARR0]](s64), 0
-; CHECK: [[ARR0_1:%[0-9]+]](s32) = G_EXTRACT [[ARR0]](s64), 32
+; CHECK: [[ARR0_0:%[0-9]+]](s32), [[ARR0_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR0]](s64)
; LITTLE: %r0 = COPY [[ARR0_0]](s32)
; LITTLE: %r1 = COPY [[ARR0_1]](s32)
; BIG: %r0 = COPY [[ARR0_1]](s32)
; BIG: %r1 = COPY [[ARR0_0]](s32)
-; CHECK: [[ARR1_0:%[0-9]+]](s32) = G_EXTRACT [[ARR1]](s64), 0
-; CHECK: [[ARR1_1:%[0-9]+]](s32) = G_EXTRACT [[ARR1]](s64), 32
+; CHECK: [[ARR1_0:%[0-9]+]](s32), [[ARR1_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR1]](s64)
; LITTLE: %r2 = COPY [[ARR1_0]](s32)
; LITTLE: %r3 = COPY [[ARR1_1]](s32)
; BIG: %r2 = COPY [[ARR1_1]](s32)
@@ -815,10 +807,10 @@ declare arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double], [3
define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) {
; CHECK-LABEL: name: test_fp_arrays_aapcs_vfp
; CHECK: fixedStack:
-; CHECK-DAG: id: [[Z0_ID:[0-9]+]], offset: 0, size: 8
-; CHECK-DAG: id: [[Z1_ID:[0-9]+]], offset: 8, size: 8
-; CHECK-DAG: id: [[Z2_ID:[0-9]+]], offset: 16, size: 8
-; CHECK-DAG: id: [[Z3_ID:[0-9]+]], offset: 24, size: 8
+; CHECK-DAG: id: [[Z0_ID:[0-9]+]], type: default, offset: 0, size: 8,
+; CHECK-DAG: id: [[Z1_ID:[0-9]+]], type: default, offset: 8, size: 8,
+; CHECK-DAG: id: [[Z2_ID:[0-9]+]], type: default, offset: 16, size: 8,
+; CHECK-DAG: id: [[Z3_ID:[0-9]+]], type: default, offset: 24, size: 8,
; CHECK: liveins: %d0, %d1, %d2, %s6, %s7, %s8
; CHECK: [[X0:%[0-9]+]](s64) = COPY %d0
; CHECK: [[X1:%[0-9]+]](s64) = COPY %d1
@@ -916,8 +908,8 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) {
; CHECK: fixedStack:
; The parameters live in separate stack locations, one for each element that
; doesn't fit in the registers.
-; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], offset: 0, size: 4
-; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], offset: 76, size: 4
+; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4,
+; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 76, size: 4
; CHECK: liveins: %r0, %r1, %r2, %r3
; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0
; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1
@@ -979,8 +971,8 @@ declare arm_aapcscc {i32, i32} @structs_target({i32, i32}, {i32*, float, i32, do
define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x, {i32*, float, i32, double} %y) {
; CHECK-LABEL: test_structs
; CHECK: fixedStack:
-; CHECK-DAG: id: [[Y2_ID:[0-9]+]], offset: 0, size: 4
-; CHECK-DAG: id: [[Y3_ID:[0-9]+]], offset: 8, size: 8
+; CHECK-DAG: id: [[Y2_ID:[0-9]+]], type: default, offset: 0, size: 4,
+; CHECK-DAG: id: [[Y3_ID:[0-9]+]], type: default, offset: 8, size: 8,
; CHECK: liveins: %r0, %r1, %r2, %r3
; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0
; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1
diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
index 57ccff90c0bb..6ddc29a3bbba 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
@@ -153,6 +153,87 @@ entry:
ret i32 %sum
}
+define i8 @test_and_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_and_i8:
+; CHECK: and r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = and i8 %x, %y
+ ret i8 %sum
+}
+
+define i16 @test_and_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_and_i16:
+; CHECK: and r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = and i16 %x, %y
+ ret i16 %sum
+}
+
+define i32 @test_and_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_and_i32:
+; CHECK: and r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = and i32 %x, %y
+ ret i32 %sum
+}
+
+define i8 @test_or_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_or_i8:
+; CHECK: orr r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = or i8 %x, %y
+ ret i8 %sum
+}
+
+define i16 @test_or_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_or_i16:
+; CHECK: orr r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = or i16 %x, %y
+ ret i16 %sum
+}
+
+define i32 @test_or_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_or_i32:
+; CHECK: orr r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = or i32 %x, %y
+ ret i32 %sum
+}
+
+define i8 @test_xor_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_xor_i8:
+; CHECK: eor r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = xor i8 %x, %y
+ ret i8 %sum
+}
+
+define i16 @test_xor_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_xor_i16:
+; CHECK: eor r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = xor i16 %x, %y
+ ret i16 %sum
+}
+
+define i32 @test_xor_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_xor_i32:
+; CHECK: eor r0, r0, r1
+; CHECK: bx lr
+entry:
+ %sum = xor i32 %x, %y
+ ret i32 %sum
+}
+
define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
; CHECK-LABEL: test_stack_args_i32:
; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4
@@ -272,8 +353,7 @@ define arm_aapcscc double @test_double_softfp(double %f0, double %f1) {
; CHECK-DAG: vmov [[F0:d[0-9]+]], r0, r1
; CHECK-DAG: vmov [[F1:d[0-9]+]], r2, r3
; CHECK: vadd.f64 [[FV:d[0-9]+]], [[F0]], [[F1]]
-; CHECK: vmov.32 r0, [[FV]][0]
-; CHECK: vmov.32 r1, [[FV]][1]
+; CHECK: vmov r0, r1, [[FV]]
; CHECK: bx lr
entry:
%v = fadd double %f0, %f1
diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
index d154b4887c19..803135ba595e 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
@@ -82,10 +82,10 @@ body: |
%1(s32) = COPY %r1
%2(s32) = COPY %r2
%3(s32) = COPY %r3
- ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]]
- ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]]
- %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
- %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32
+ ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]]
+ ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]]
+ %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+ %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
; CHECK: ADJCALLSTACKDOWN
; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]]
; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]]
@@ -97,8 +97,7 @@ body: |
; HARD: BLX $fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0
; CHECK: ADJCALLSTACKUP
%6(s64) = G_FREM %4, %5
- %7(s32) = G_EXTRACT %6(s64), 0
- %8(s32) = G_EXTRACT %6(s64), 32
+ %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
%r0 = COPY %7(s32)
%r1 = COPY %8(s32)
BX_RET 14, _, implicit %r0, implicit %r1
@@ -174,10 +173,10 @@ body: |
%1(s32) = COPY %r1
%2(s32) = COPY %r2
%3(s32) = COPY %r3
- ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]]
- ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]]
- %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
- %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32
+ ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]]
+ ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]]
+ %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+ %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
; CHECK: ADJCALLSTACKDOWN
; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]]
; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]]
@@ -189,8 +188,7 @@ body: |
; HARD: BLX $pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0
; CHECK: ADJCALLSTACKUP
%6(s64) = G_FPOW %4, %5
- %7(s32) = G_EXTRACT %6(s64), 0
- %8(s32) = G_EXTRACT %6(s64), 32
+ %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
%r0 = COPY %7(s32)
%r1 = COPY %8(s32)
BX_RET 14, _, implicit %r0, implicit %r1
@@ -258,10 +256,10 @@ body: |
%1(s32) = COPY %r1
%2(s32) = COPY %r2
%3(s32) = COPY %r3
- ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]]
- ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]]
- %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
- %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32
+ ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]]
+ ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]]
+ %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+ %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
; HARD: [[R:%[0-9]+]](s64) = G_FADD [[X]], [[Y]]
; SOFT: ADJCALLSTACKDOWN
; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]]
@@ -272,10 +270,8 @@ body: |
; SOFT-DEFAULT: BLX $__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
; SOFT: ADJCALLSTACKUP
%6(s64) = G_FADD %4, %5
- ; HARD-DAG: G_EXTRACT [[R]](s64), 0
- ; HARD-DAG: G_EXTRACT [[R]](s64), 32
- %7(s32) = G_EXTRACT %6(s64), 0
- %8(s32) = G_EXTRACT %6(s64), 32
+ ; HARD-DAG: G_UNMERGE_VALUES [[R]](s64)
+ %7(s32),%8(s32) = G_UNMERGE_VALUES %6(s64)
%r0 = COPY %7(s32)
%r1 = COPY %8(s32)
BX_RET 14, _, implicit %r0, implicit %r1
diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
index f6ac92597cb2..c6f6ca81c279 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
@@ -15,6 +15,18 @@
define void @test_mul_s16() { ret void }
define void @test_mul_s32() { ret void }
+ define void @test_and_s8() { ret void }
+ define void @test_and_s16() { ret void }
+ define void @test_and_s32() { ret void }
+
+ define void @test_or_s8() { ret void }
+ define void @test_or_s16() { ret void }
+ define void @test_or_s32() { ret void }
+
+ define void @test_xor_s8() { ret void }
+ define void @test_xor_s16() { ret void }
+ define void @test_xor_s32() { ret void }
+
define void @test_load_from_stack() { ret void }
define void @test_legal_loads() #0 { ret void }
define void @test_legal_stores() #0 { ret void }
@@ -301,6 +313,234 @@ body: |
...
---
+name: test_and_s8
+# CHECK-LABEL: name: test_and_s8
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s8) = COPY %r0
+ %1(s8) = COPY %r1
+ %2(s8) = G_AND %0, %1
+ ; G_AND with s8 should widen
+ ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}}
+ ; CHECK-NOT: {{%[0-9]+}}(s8) = G_AND {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s8)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_and_s16
+# CHECK-LABEL: name: test_and_s16
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s16) = COPY %r0
+ %1(s16) = COPY %r1
+ %2(s16) = G_AND %0, %1
+ ; G_AND with s16 should widen
+ ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}}
+ ; CHECK-NOT: {{%[0-9]+}}(s16) = G_AND {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s16)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_and_s32
+# CHECK-LABEL: name: test_and_s32
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_AND %0, %1
+ ; G_AND with s32 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_or_s8
+# CHECK-LABEL: name: test_or_s8
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s8) = COPY %r0
+ %1(s8) = COPY %r1
+ %2(s8) = G_OR %0, %1
+ ; G_OR with s8 should widen
+ ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}}
+ ; CHECK-NOT: {{%[0-9]+}}(s8) = G_OR {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s8)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_or_s16
+# CHECK-LABEL: name: test_or_s16
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s16) = COPY %r0
+ %1(s16) = COPY %r1
+ %2(s16) = G_OR %0, %1
+ ; G_OR with s16 should widen
+ ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}}
+ ; CHECK-NOT: {{%[0-9]+}}(s16) = G_OR {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s16)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_or_s32
+# CHECK-LABEL: name: test_or_s32
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_OR %0, %1
+ ; G_OR with s32 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_xor_s8
+# CHECK-LABEL: name: test_xor_s8
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s8) = COPY %r0
+ %1(s8) = COPY %r1
+ %2(s8) = G_XOR %0, %1
+ ; G_XOR with s8 should widen
+ ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}}
+ ; CHECK-NOT: {{%[0-9]+}}(s8) = G_XOR {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s8)
+ BX_RET 14, _, implicit %r0
+...
+---
+name: test_xor_s16
+# CHECK-LABEL: name: test_xor_s16
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s16) = COPY %r0
+ %1(s16) = COPY %r1
+ %2(s16) = G_XOR %0, %1
+ ; G_XOR with s16 should widen
+ ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}}
+ ; CHECK-NOT: {{%[0-9]+}}(s16) = G_XOR {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s16)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_xor_s32
+# CHECK-LABEL: name: test_xor_s32
+legalized: false
+# CHECK: legalized: true
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_XOR %0, %1
+ ; G_XOR with s32 is legal, so we should find it unchanged in the output
+ ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}}
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
name: test_load_from_stack
# CHECK-LABEL: name: test_load_from_stack
legalized: false
@@ -317,7 +557,7 @@ fixedStack:
- { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
- { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
- { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false }
- # CHECK: id: [[FRAME_INDEX:[0-9]+]], offset: 8
+ # CHECK: id: [[FRAME_INDEX:[0-9]+]], type: default, offset: 8
body: |
bb.0:
liveins: %r0, %r1, %r2, %r3
diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
index dfccc47c277c..cc1df80c6019 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
@@ -16,6 +16,10 @@
define void @test_sdiv_s32() #1 { ret void }
define void @test_udiv_s32() #1 { ret void }
+ define void @test_and_s32() { ret void}
+ define void @test_or_s32() { ret void}
+ define void @test_xor_s32() { ret void}
+
define void @test_loads() #0 { ret void }
define void @test_stores() #0 { ret void }
@@ -45,9 +49,9 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -71,12 +75,12 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -106,12 +110,12 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -141,12 +145,12 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -176,9 +180,9 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -202,12 +206,12 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -237,12 +241,12 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -272,9 +276,9 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -298,12 +302,12 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -333,12 +337,12 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -368,9 +372,9 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -394,9 +398,9 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -414,19 +418,97 @@ body: |
...
---
+name: test_and_s32
+# CHECK-LABEL: name: test_and_s32
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_AND %0, %1
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_or_s32
+# CHECK-LABEL: name: test_or_s32
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_OR %0, %1
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
+name: test_xor_s32
+# CHECK-LABEL: name: test_xor_s32
+legalized: true
+regBankSelected: false
+selected: false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+body: |
+ bb.0:
+ liveins: %r0, %r1
+
+ %0(s32) = COPY %r0
+ %1(s32) = COPY %r1
+ %2(s32) = G_XOR %0, %1
+ %r0 = COPY %2(s32)
+ BX_RET 14, _, implicit %r0
+
+...
+---
name: test_loads
# CHECK-LABEL: name: test_loads
legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
-# CHECK: - { id: 6, class: fprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
+# CHECK: - { id: 6, class: fprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -456,13 +538,13 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
-# CHECK: - { id: 6, class: fprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
+# CHECK: - { id: 6, class: fprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -498,11 +580,11 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -531,9 +613,9 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -556,7 +638,7 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
body: |
@@ -572,8 +654,8 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -593,8 +675,8 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -614,8 +696,8 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -635,9 +717,9 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: fprb }
-# CHECK: - { id: 1, class: fprb }
-# CHECK: - { id: 2, class: fprb }
+# CHECK: - { id: 0, class: fprb, preferred-register: '' }
+# CHECK: - { id: 1, class: fprb, preferred-register: '' }
+# CHECK: - { id: 2, class: fprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -661,9 +743,9 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: fprb }
-# CHECK: - { id: 1, class: fprb }
-# CHECK: - { id: 2, class: fprb }
+# CHECK: - { id: 0, class: fprb, preferred-register: '' }
+# CHECK: - { id: 1, class: fprb, preferred-register: '' }
+# CHECK: - { id: 2, class: fprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -687,11 +769,11 @@ legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: fprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: fprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -705,9 +787,8 @@ body: |
%0(s32) = COPY %r0
%1(s32) = COPY %r1
- %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
- %3(s32) = G_EXTRACT %2(s64), 0
- %4(s32) = G_EXTRACT %2(s64), 32
+ %2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+ %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64)
%r0 = COPY %3(s32)
%r1 = COPY %4(s32)
BX_RET 14, _, implicit %r0, implicit %r1
diff --git a/test/CodeGen/ARM/clang-section.ll b/test/CodeGen/ARM/clang-section.ll
new file mode 100644
index 000000000000..343f0e721d7f
--- /dev/null
+++ b/test/CodeGen/ARM/clang-section.ll
@@ -0,0 +1,140 @@
+;RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s
+;Test that global variables and functions are assigned to correct sections.
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7-arm-none-eabi"
+
+@a = global i32 0, align 4 #0
+@b = global i32 1, align 4 #0
+@c = global [4 x i32] zeroinitializer, align 4 #0
+@d = global [5 x i16] zeroinitializer, align 2 #0
+@e = global [6 x i16] [i16 0, i16 0, i16 1, i16 0, i16 0, i16 0], align 2 #0
+@f = constant i32 2, align 4 #0
+@h = global i32 0, align 4 #1
+@i = global i32 0, align 4 #2
+@j = constant i32 4, align 4 #2
+@k = global i32 0, align 4 #2
+@_ZZ3gooE7lstat_h = internal global i32 0, align 4 #2
+@_ZL1g = internal global [2 x i32] zeroinitializer, align 4 #0
+@l = global i32 5, align 4 #3
+@m = constant i32 6, align 4 #3
+@n = global i32 0, align 4
+@o = global i32 6, align 4
+@p = constant i32 7, align 4
+
+; Function Attrs: noinline nounwind
+define i32 @foo() #4 {
+entry:
+ %0 = load i32, i32* @b, align 4
+ ret i32 %0
+}
+
+; Function Attrs: noinline
+define i32 @goo() #5 {
+entry:
+ %call = call i32 @zoo(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZL1g, i32 0, i32 0), i32* @_ZZ3gooE7lstat_h)
+ ret i32 %call
+}
+
+declare i32 @zoo(i32*, i32*) #6
+
+; Function Attrs: noinline nounwind
+define i32 @hoo() #7 {
+entry:
+ %0 = load i32, i32* @b, align 4
+ ret i32 %0
+}
+
+attributes #0 = { "bss-section"="my_bss.1" "data-section"="my_data.1" "rodata-section"="my_rodata.1" }
+attributes #1 = { "data-section"="my_data.1" "rodata-section"="my_rodata.1" }
+attributes #2 = { "bss-section"="my_bss.2" "rodata-section"="my_rodata.1" }
+attributes #3 = { "bss-section"="my_bss.2" "data-section"="my_data.2" "rodata-section"="my_rodata.2" }
+attributes #4 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "implicit-section-name"="my_text.1" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "implicit-section-name"="my_text.2" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #7 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"static_rwdata", i32 1}
+!2 = !{i32 1, !"enumsize_buildattr", i32 2}
+!3 = !{i32 1, !"armlib_unavailable", i32 0}
+
+;CHECK: .section my_text.1,"ax",%progbits
+;CHECK: .type foo,%function
+;CHECK: foo:
+
+;CHECK: .section my_text.2,"ax",%progbits
+;CHECK: .type goo,%function
+;CHECK: goo:
+
+;CHECK: .text
+;CHECK: .type hoo,%function
+;CHECK: hoo:
+
+;CHECK: .type a,%object
+;CHECK: .section my_bss.1,"aw",%nobits
+;CHECK: a:
+
+;CHECK: .type b,%object
+;CHECK: .section my_data.1,"aw",%progbits
+;CHECK: b:
+
+;CHECK: .type c,%object
+;CHECK: .section my_bss.1,"aw",%nobits
+;CHECK: c:
+
+;CHECK: .type d,%object
+;CHECK: d:
+
+;CHECK: .type e,%object
+;CHECK: .section my_data.1,"aw",%progbits
+;CHECK: e:
+
+;CHECK: .type f,%object
+;CHECK: .section my_rodata.1,"a",%progbits
+;CHECK: f:
+
+;CHECK: .type h,%object
+;CHECK: .bss
+;CHECK: h:
+
+;CHECK: .type i,%object
+;CHECK: .section my_bss.2,"aw",%nobits
+;CHECK: i:
+
+;CHECK: .type j,%object
+;CHECK: .section my_rodata.1,"a",%progbits
+;CHECK: j:
+
+;CHECK: .type k,%object
+;CHECK: .section my_bss.2,"aw",%nobits
+;CHECK: k:
+
+;CHECK: .type _ZZ3gooE7lstat_h,%object @ @_ZZ3gooE7lstat_h
+;CHECK: _ZZ3gooE7lstat_h:
+
+;CHECK: .type _ZL1g,%object
+;CHECK: .section my_bss.1,"aw",%nobits
+;CHECK: _ZL1g:
+
+;CHECK: .type l,%object
+;CHECK: .section my_data.2,"aw",%progbits
+;CHECK: l:
+
+;CHECK: .type m,%object
+;CHECK: .section my_rodata.2,"a",%progbits
+;CHECK: m:
+
+;CHECK: .type n,%object
+;CHECK: .bss
+;CHECK: n:
+
+;CHECK: .type o,%object
+;CHECK: .data
+;CHECK: o:
+
+;CHECK: .type p,%object
+;CHECK: .section .rodata,"a",%progbits
+;CHECK: p:
diff --git a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
index a9223e1e2a99..5f914323861a 100644
--- a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
+++ b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
@@ -1,5 +1,6 @@
; REQUIRES: asserts
-; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST
; Check latencies of vmul/vfma accumulate chains.
define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
@@ -14,7 +15,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
; > VMULS read-advanced latency to VMLAS = 0
; CHECK-SAME: Latency=0
-; CHECK: VMLAS
+; CHECK-DEFAULT: VMLAS
+; CHECK-FAST: VFMAS
; > VMLAS common latency = 9
; CHECK: Latency : 9
; CHECK: Successors:
@@ -22,7 +24,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
; > VMLAS read-advanced latency to the next VMLAS = 4
; CHECK-SAME: Latency=4
-; CHECK: VMLAS
+; CHECK-DEFAULT: VMLAS
+; CHECK-FAST: VFMAS
; CHECK: Latency : 9
; CHECK: Successors:
; CHECK: data
@@ -51,7 +54,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
; VMULfd read-advanced latency to VMLAfd = 0
; CHECK-SAME: Latency=0
-; CHECK: VMLAfd
+; CHECK-DEFAULT: VMLAfd
+; CHECK-FAST: VFMAfd
; > VMLAfd common latency = 9
; CHECK: Latency : 9
; CHECK: Successors:
@@ -59,7 +63,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
; > VMLAfd read-advanced latency to the next VMLAfd = 4
; CHECK-SAME: Latency=4
-; CHECK: VMLAfd
+; CHECK-DEFAULT: VMLAfd
+; CHECK-FAST: VFMAfd
; CHECK: Latency : 9
; CHECK: Successors:
; CHECK: data
@@ -75,3 +80,79 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
ret <2 x float> %add2
}
+define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
+; CHECK: ********** MI Scheduling **********
+; CHECK: Test3:BB#0
+
+; CHECK: VMULS
+; > VMULS common latency = 5
+; CHECK: Latency : 5
+; CHECK: Successors:
+; CHECK: data
+; > VMULS read-advanced latency to VMLSS = 0
+; CHECK-SAME: Latency=0
+
+; CHECK-DEFAULT: VMLSS
+; CHECK-FAST: VFMSS
+; > VMLSS common latency = 9
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLSS read-advanced latency to the next VMLSS = 4
+; CHECK-SAME: Latency=4
+
+; CHECK-DEFAULT: VMLSS
+; CHECK-FAST: VFMSS
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLSS not-optimized latency to VMOVRS = 9
+; CHECK-SAME: Latency=9
+
+; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS
+ %mul1 = fmul float %f1, %f2
+ %mul2 = fmul float %f3, %f4
+ %mul3 = fmul float %f5, %f6
+ %sub1 = fsub float %mul1, %mul2
+ %sub2 = fsub float %sub1, %mul3
+ ret float %sub2
+}
+
+; ASIMD form
+define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) {
+; CHECK: ********** MI Scheduling **********
+; CHECK: Test4:BB#0
+
+; CHECK: VMULfd
+; > VMULfd common latency = 5
+; CHECK: Latency : 5
+; CHECK: Successors:
+; CHECK: data
+; VMULfd read-advanced latency to VMLSfd = 0
+; CHECK-SAME: Latency=0
+
+; CHECK-DEFAULT: VMLSfd
+; CHECK-FAST: VFMSfd
+; > VMLSfd common latency = 9
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLSfd read-advanced latency to the next VMLSfd = 4
+; CHECK-SAME: Latency=4
+
+; CHECK-DEFAULT: VMLSfd
+; CHECK-FAST: VFMSfd
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLSfd not-optimized latency to VMOVRRD = 9
+; CHECK-SAME: Latency=9
+
+; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS
+ %mul1 = fmul <2 x float> %f1, %f2
+ %mul2 = fmul <2 x float> %f3, %f4
+ %mul3 = fmul <2 x float> %f5, %f6
+ %sub1 = fsub <2 x float> %mul1, %mul2
+ %sub2 = fsub <2 x float> %sub1, %mul3
+ ret <2 x float> %sub2
+}
diff --git a/test/CodeGen/ARM/invalidated-save-point.ll b/test/CodeGen/ARM/invalidated-save-point.ll
index 0ff153b6799d..bb602308a179 100644
--- a/test/CodeGen/ARM/invalidated-save-point.ll
+++ b/test/CodeGen/ARM/invalidated-save-point.ll
@@ -4,8 +4,8 @@
; this point. Notably, if it isn't is will be invalid and reference a
; deleted block (%bb.-1.if.end)
-; CHECK-NOT: savePoint:
-; CHECK-NOT: restorePoint:
+; CHECK: savePoint: ''
+; CHECK: restorePoint: ''
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7"
diff --git a/test/CodeGen/Generic/llc-start-stop.ll b/test/CodeGen/Generic/llc-start-stop.ll
index 7508f94c50a9..49407fbb2d88 100644
--- a/test/CodeGen/Generic/llc-start-stop.ll
+++ b/test/CodeGen/Generic/llc-start-stop.ll
@@ -10,12 +10,12 @@
; STOP-BEFORE-NOT: Loop Strength Reduction
; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-AFTER
-; START-AFTER: -machine-branch-prob -pre-isel-intrinsic-lowering
+; START-AFTER: -machine-branch-prob -gc-lowering
; START-AFTER: FunctionPass Manager
; START-AFTER-NEXT: Lower Garbage Collection Instructions
; RUN: llc < %s -debug-pass=Structure -start-before=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-BEFORE
-; START-BEFORE: -machine-branch-prob -pre-isel-intrinsic-lowering
+; START-BEFORE: -machine-branch-prob -domtree
; START-BEFORE: FunctionPass Manager
; START-BEFORE: Loop Strength Reduction
; START-BEFORE-NEXT: Lower Garbage Collection Instructions
diff --git a/test/CodeGen/Hexagon/common-gep-inbounds.ll b/test/CodeGen/Hexagon/common-gep-inbounds.ll
new file mode 100644
index 000000000000..a8b75725a0b8
--- /dev/null
+++ b/test/CodeGen/Hexagon/common-gep-inbounds.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=hexagon -debug-only=commgep 2>&1 < %s | FileCheck %s
+; REQUIRES: asserts
+
+; We should generate new GEPs with "inbounds" flag.
+; CHECK: new GEP:{{.*}}inbounds
+; CHECK: new GEP:{{.*}}inbounds
+
+target triple = "hexagon"
+
+%struct.0 = type { i16, i16 }
+
+; Function Attrs: nounwind
+define i16 @TraceBack() #0 {
+entry:
+ %p = getelementptr inbounds %struct.0, %struct.0* undef, i32 0, i32 0
+ %a = load i16, i16* %p
+ ret i16 %a
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" }
diff --git a/test/CodeGen/Hexagon/mux-undef.ll b/test/CodeGen/Hexagon/mux-undef.ll
new file mode 100644
index 000000000000..3780a329b1eb
--- /dev/null
+++ b/test/CodeGen/Hexagon/mux-undef.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s
+;
+; Make sure this test compiles successfully.
+; CHECK: jumpr r31
+
+target triple = "hexagon--elf"
+
+; Function Attrs: nounwind
+define i32 @fred() #0 {
+b0:
+ call void @foo() #0
+ br label %b1
+
+b1: ; preds = %b0
+ br i1 undef, label %b2, label %b3
+
+b2: ; preds = %b1
+ br label %b3
+
+b3: ; preds = %b2, %b1
+ %v4 = phi i32 [ 1, %b1 ], [ 2, %b2 ]
+ ret i32 %v4
+}
+
+declare void @foo() #0
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" }
diff --git a/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir b/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir
index d63c2ef6e871..af785bcb10a9 100644
--- a/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir
+++ b/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir
@@ -17,6 +17,5 @@ body: |
liveins: %w0
; ERR: generic virtual registers must have a type
; ERR-NEXT: %0
- ; ERR: Unable to initialize machine function
%0 = G_ADD i32 %w0, %w0
...
diff --git a/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir b/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir
index e331179773d6..f177b91da559 100644
--- a/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir
+++ b/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir
@@ -18,6 +18,5 @@ body: |
liveins: %w0
; ERR: generic virtual registers must have a type
; ERR-NEXT: %0
- ; ERR: Unable to initialize machine function
%0 = G_ADD i32 %w0, %w0
...
diff --git a/test/CodeGen/MIR/AArch64/register-operand-bank.mir b/test/CodeGen/MIR/AArch64/register-operand-bank.mir
index d48495167f15..d2f99933a35a 100644
--- a/test/CodeGen/MIR/AArch64/register-operand-bank.mir
+++ b/test/CodeGen/MIR/AArch64/register-operand-bank.mir
@@ -7,8 +7,8 @@
---
# CHECK-LABEL: name: func
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: fpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr, preferred-register: '' }
name: func
body: |
bb.0:
diff --git a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
index fc0c4ce8c07f..cfb3aef5fb0f 100644
--- a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
+++ b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
@@ -25,7 +25,9 @@ frameInfo:
maxAlignment: 8
# CHECK-LABEL: stack_local
# CHECK: stack:
-# CHECK-NEXT: { id: 0, name: local_var, offset: 0, size: 8, alignment: 8, local-offset: -8 }
+# CHECK-NEXT: { id: 0, name: local_var, type: default, offset: 0, size: 8, alignment: 8,
+# CHECK-NEXT: callee-saved-register: '', local-offset: -8, di-variable: '', di-expression: '',
+# CHECK-NEXT: di-location: '' }
stack:
- { id: 0,name: local_var,offset: 0,size: 8,alignment: 8, local-offset: -8 }
body: |
diff --git a/test/CodeGen/MIR/Generic/frame-info.mir b/test/CodeGen/MIR/Generic/frame-info.mir
index 157eb99e149e..a467bfa3a1af 100644
--- a/test/CodeGen/MIR/Generic/frame-info.mir
+++ b/test/CodeGen/MIR/Generic/frame-info.mir
@@ -36,9 +36,13 @@ tracksRegLiveness: true
# CHECK-NEXT: maxAlignment:
# CHECK-NEXT: adjustsStack: false
# CHECK-NEXT: hasCalls: false
+# CHECK-NEXT: stackProtector: ''
+# CHECK-NEXT: maxCallFrameSize:
# CHECK-NEXT: hasOpaqueSPAdjustment: false
# CHECK-NEXT: hasVAStart: false
# CHECK-NEXT: hasMustTailInVarArgFunc: false
+# CHECK-NEXT: savePoint: ''
+# CHECK-NEXT: restorePoint: ''
# CHECK: body
frameInfo:
maxAlignment: 4
@@ -61,6 +65,7 @@ tracksRegLiveness: true
# CHECK-NEXT: maxAlignment:
# CHECK-NEXT: adjustsStack: true
# CHECK-NEXT: hasCalls: true
+# CHECK-NEXT: stackProtector: ''
# CHECK-NEXT: maxCallFrameSize: 4
# CHECK-NEXT: hasOpaqueSPAdjustment: true
# CHECK-NEXT: hasVAStart: true
diff --git a/test/CodeGen/MIR/Generic/function-missing-machine-function.mir b/test/CodeGen/MIR/Generic/function-missing-machine-function.mir
deleted file mode 100644
index f3a834801671..000000000000
--- a/test/CodeGen/MIR/Generic/function-missing-machine-function.mir
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s
-# This test verifies that an error is reported when a MIR file has some
-# function but is missing a corresponding machine function.
-
-# CHECK: no machine function information for function 'foo' in the MIR file
-
---- |
-
- define i32 @foo() {
- ret i32 0
- }
-
-...
diff --git a/test/CodeGen/MIR/X86/callee-saved-info.mir b/test/CodeGen/MIR/X86/callee-saved-info.mir
index 883f6fdb0d22..6920611019b9 100644
--- a/test/CodeGen/MIR/X86/callee-saved-info.mir
+++ b/test/CodeGen/MIR/X86/callee-saved-info.mir
@@ -50,12 +50,12 @@ frameInfo:
adjustsStack: true
hasCalls: true
# CHECK: fixedStack:
-# CHECK-NEXT: , callee-saved-register: '%rbx' }
+# CHECK: , callee-saved-register: '%rbx' }
fixedStack:
- { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%rbx' }
# CHECK: stack:
# CHECK-NEXT: - { id: 0
-# CHECK-NEXT: , callee-saved-register: '%edi' }
+# CHECK: callee-saved-register: '%edi'
stack:
- { id: 0, name: b, offset: -20, size: 4, alignment: 4 }
- { id: 1, offset: -24, size: 4, alignment: 4, callee-saved-register: '%edi' }
diff --git a/test/CodeGen/MIR/X86/empty0.mir b/test/CodeGen/MIR/X86/empty0.mir
new file mode 100644
index 000000000000..4431af7c6a99
--- /dev/null
+++ b/test/CodeGen/MIR/X86/empty0.mir
@@ -0,0 +1,6 @@
+# RUN: llc -run-pass none -o - %s | FileCheck %s
+# Make sure empty files don't crash us
+# CHECK: --- |
+# ... moduleid, sourcefilename stuff here ..
+# CHECK: target datalayout =
+# CHECK: ...
diff --git a/test/CodeGen/MIR/X86/empty1.mir b/test/CodeGen/MIR/X86/empty1.mir
new file mode 100644
index 000000000000..d80b0cd30231
--- /dev/null
+++ b/test/CodeGen/MIR/X86/empty1.mir
@@ -0,0 +1,8 @@
+# RUN: llc -run-pass none -o - %s | FileCheck %s
+# Make sure empty files don't crash us
+--- |
+...
+# CHECK: --- |
+# ... moduleid, sourcefilename stuff here ..
+# CHECK: target datalayout =
+# CHECK: ...
diff --git a/test/CodeGen/MIR/X86/empty2.mir b/test/CodeGen/MIR/X86/empty2.mir
new file mode 100644
index 000000000000..7495807cd4d6
--- /dev/null
+++ b/test/CodeGen/MIR/X86/empty2.mir
@@ -0,0 +1,8 @@
+# RUN: llc -run-pass none -o - %s | FileCheck %s
+# Make sure empty files don't crash us
+---
+...
+# CHECK: --- |
+# ... moduleid, sourcefilename stuff here ..
+# CHECK: target datalayout =
+# CHECK: ...
diff --git a/test/CodeGen/MIR/X86/fixed-stack-objects.mir b/test/CodeGen/MIR/X86/fixed-stack-objects.mir
index a7ecac841a64..c87cb0b49f93 100644
--- a/test/CodeGen/MIR/X86/fixed-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/fixed-stack-objects.mir
@@ -20,7 +20,7 @@ frameInfo:
stackSize: 4
maxAlignment: 4
# CHECK: fixedStack:
-# CHECK-NEXT: - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+# CHECK-NEXT: - { id: 0, type: default, offset: 0, size: 4, alignment: 4, isImmutable: true,
fixedStack:
- { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
stack:
diff --git a/test/CodeGen/MIR/X86/generic-instr-type.mir b/test/CodeGen/MIR/X86/generic-instr-type.mir
index b9e47cdf6192..78951de70a3c 100644
--- a/test/CodeGen/MIR/X86/generic-instr-type.mir
+++ b/test/CodeGen/MIR/X86/generic-instr-type.mir
@@ -19,11 +19,11 @@
---
name: test_vregs
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: _ }
-# CHECK-NEXT: - { id: 1, class: _ }
-# CHECK-NEXT: - { id: 2, class: _ }
-# CHECK-NEXT: - { id: 3, class: _ }
-# CHECK-NEXT: - { id: 4, class: _ }
+# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/MIR/X86/inline-asm.mir b/test/CodeGen/MIR/X86/inline-asm.mir
new file mode 100644
index 000000000000..be96517144b0
--- /dev/null
+++ b/test/CodeGen/MIR/X86/inline-asm.mir
@@ -0,0 +1,12 @@
+# RUN: llc -o - %s -mtriple=x86_64-- -run-pass none | FileCheck %s
+---
+# Avoid crash/assert when using an emptystring in an INLINEASM.
+# CHECK-LABEL: name: emptystring
+# CHECK: bb.0:
+# CHECK: INLINEASM $"", 1
+# CHECK: RET 0
+name: emptystring
+body: |
+ bb.0:
+ INLINEASM $"", 1
+ RET 0
diff --git a/test/CodeGen/MIR/X86/register-operand-class.mir b/test/CodeGen/MIR/X86/register-operand-class.mir
index 63019daad7a1..abdcda2a077b 100644
--- a/test/CodeGen/MIR/X86/register-operand-class.mir
+++ b/test/CodeGen/MIR/X86/register-operand-class.mir
@@ -1,4 +1,4 @@
-# RUN: llc -o - %s -march=x86-64 -run-pass none | FileCheck %s
+# RUN: llc -o - %s -march=x86-64 -run-pass none | FileCheck %s
# Test various aspects of register class specification on machine operands.
--- |
define void @func() { ret void }
@@ -6,11 +6,11 @@
---
# CHECK-LABEL: name: func
# CHECK: registers:
-# CHECK: - { id: 0, class: gr32 }
-# CHECK: - { id: 1, class: gr64 }
-# CHECK: - { id: 2, class: gr32 }
-# CHECK: - { id: 3, class: gr16 }
-# CHECK: - { id: 4, class: _ }
+# CHECK: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK: - { id: 1, class: gr64, preferred-register: '' }
+# CHECK: - { id: 2, class: gr32, preferred-register: '' }
+# CHECK: - { id: 3, class: gr16, preferred-register: '' }
+# CHECK: - { id: 4, class: _, preferred-register: '' }
name: func
body: |
bb.0:
diff --git a/test/CodeGen/MIR/X86/roundtrip.mir b/test/CodeGen/MIR/X86/roundtrip.mir
new file mode 100644
index 000000000000..c697f7306041
--- /dev/null
+++ b/test/CodeGen/MIR/X86/roundtrip.mir
@@ -0,0 +1,20 @@
+# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=none | llc -o - -x mir - -mtriple=x86_64-- -run-pass=none | FileCheck %s
+---
+# CHECK-LABEL: name: func0
+# CHECK: registers:
+# CHECK: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK: body: |
+# CHECK: bb.0:
+# CHECK: %0 = MOV32r0 implicit-def %eflags
+# CHECK: dead %1 = COPY %0
+# CHECK: MOV32mr undef %rcx, 1, _, 0, _, killed %0 :: (volatile store 4)
+# CHECK: RETQ undef %eax
+name: func0
+body: |
+ bb.0:
+ %0 : gr32 = MOV32r0 implicit-def %eflags
+ dead %1 : gr32 = COPY %0
+ MOV32mr undef %rcx, 1, _, 0, _, killed %0 :: (volatile store 4)
+ RETQ undef %eax
+...
diff --git a/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
index 27ca266f7794..310fa6a1c53b 100644
--- a/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
+++ b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
@@ -15,7 +15,7 @@
name: test
tracksRegLiveness: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '%esi' }
# CHECK-NEXT: - { id: 2, class: gr32, preferred-register: '%edi' }
registers:
diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
index 1771d6fafcae..d3c422362848 100644
--- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
@@ -19,7 +19,7 @@ name: test
frameInfo:
maxAlignment: 4
# CHECK: fixedStack:
-# CHECK-NEXT: - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4 }
+# CHECK-NEXT: - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4, callee-saved-register: '' }
fixedStack:
- { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4 }
stack:
diff --git a/test/CodeGen/MIR/X86/stack-object-debug-info.mir b/test/CodeGen/MIR/X86/stack-object-debug-info.mir
index a893b0836a62..445d1bd3f1fd 100644
--- a/test/CodeGen/MIR/X86/stack-object-debug-info.mir
+++ b/test/CodeGen/MIR/X86/stack-object-debug-info.mir
@@ -51,8 +51,9 @@ frameInfo:
maxAlignment: 16
# CHECK-LABEL: foo
# CHECK: stack:
-# CHECK: - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4',
-# CHECK-NEXT: di-expression: '!10', di-location: '!11' }
+# CHECK: - { id: 0, name: y.i, type: default, offset: 0, size: 256, alignment: 16,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '!4', di-expression: '!10',
+# CHECK-NEXT: di-location: '!11' }
stack:
- { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4',
di-expression: '!7', di-location: '!8' }
diff --git a/test/CodeGen/MIR/X86/stack-objects.mir b/test/CodeGen/MIR/X86/stack-objects.mir
index 08b9ec0b4347..608202ec5dcc 100644
--- a/test/CodeGen/MIR/X86/stack-objects.mir
+++ b/test/CodeGen/MIR/X86/stack-objects.mir
@@ -21,9 +21,12 @@ name: test
frameInfo:
maxAlignment: 8
# CHECK: stack:
-# CHECK-NEXT: - { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
-# CHECK-NEXT: - { id: 1, name: x, offset: -24, size: 8, alignment: 8 }
-# CHECK-NEXT: - { id: 2, type: spill-slot, offset: -32, size: 4, alignment: 4 }
+# CHECK-NEXT: - { id: 0, name: b, type: default, offset: -12, size: 4, alignment: 4,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+# CHECK-NEXT: - { id: 1, name: x, type: default, offset: -24, size: 8, alignment: 8,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+# CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
stack:
- { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
- { id: 1, name: x, offset: -24, size: 8, alignment: 8 }
diff --git a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
index 5e7d99352e57..95efd977d9c6 100644
--- a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
@@ -24,9 +24,11 @@ frameInfo:
maxAlignment: 8
adjustsStack: true
# CHECK: stack:
-# CHECK-NEXT: - { id: 0, offset: -20, size: 4, alignment: 4 }
-# CHECK-NEXT: - { id: 1, offset: -32, size: 8, alignment: 8 }
-# CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1 }
+# CHECK-NEXT: - { id: 0, name: '', type: default, offset: -20, size: 4, alignment: 4,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+# CHECK-NEXT: - { id: 1, name: '', type: default, offset: -32, size: 8, alignment: 8,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+# CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1,
stack:
- { id: 0, offset: -20, size: 4, alignment: 4 }
- { id: 1, offset: -32, size: 8, alignment: 8 }
diff --git a/test/CodeGen/MIR/X86/virtual-registers.mir b/test/CodeGen/MIR/X86/virtual-registers.mir
index e63bcf4acdd1..0d181f895aa9 100644
--- a/test/CodeGen/MIR/X86/virtual-registers.mir
+++ b/test/CodeGen/MIR/X86/virtual-registers.mir
@@ -33,9 +33,9 @@
name: bar
tracksRegLiveness: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gr32 }
- { id: 1, class: gr32 }
@@ -67,9 +67,9 @@ name: foo
tracksRegLiveness: true
# CHECK: name: foo
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 2, class: gr32 }
- { id: 0, class: gr32 }
diff --git a/test/CodeGen/Mips/biggot.ll b/test/CodeGen/Mips/biggot.ll
index 3acfa372a905..b266b5e05e21 100644
--- a/test/CodeGen/Mips/biggot.ll
+++ b/test/CodeGen/Mips/biggot.ll
@@ -1,6 +1,9 @@
; RUN: llc -march=mipsel -mxgot -relocation-model=pic < %s | FileCheck %s -check-prefix=O32
; RUN: llc -march=mips64el -mcpu=mips64r2 -mxgot -relocation-model=pic < %s | \
; RUN: FileCheck %s -check-prefix=N64
+; RUN: llc -march=mipsel -mxgot -relocation-model=pic -fast-isel < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mxgot -relocation-model=pic -fast-isel < %s | \
+; RUN: FileCheck %s -check-prefix=N64
@v0 = external global i32
diff --git a/test/CodeGen/Mips/cconv/vector.ll b/test/CodeGen/Mips/cconv/vector.ll
new file mode 100644
index 000000000000..5a88d064fe73
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/vector.ll
@@ -0,0 +1,1657 @@
+; RUN: llc < %s -march=mips -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
+; RUN: llc < %s -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
+; RUN: llc < %s -march=mipsel -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+
+
+
+; Test that vector types are passed through the integer register set whether or
+; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility
+; we need to handle any power of 2 number of elements. We will test this
+; exhaustively for combinations up to MSA register (128 bits) size.
+
+; First set of tests are for argument passing.
+
+define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
+; ALL-LABEL: i8_2:
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32EL: addu $1, $4, $5
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = add <2 x i8> %a, %b
+ ret <2 x i8> %1
+}
+
+; Test that vector spilled to the outgoing argument area have the expected
+; offset from $sp.
+
+define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d,
+ <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) {
+entry:
+
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $7, 24
+
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $4, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $5, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $6, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $7, 65280
+
+; MIPS32-DAG: lbu ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG; lbu ${{[0-9]+}}, 17($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 21($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 25($sp)
+
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $7, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 40($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 41($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 42($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 43($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 44($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 45($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 46($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 47($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 48($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 49($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 50($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 51($sp)
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $8, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $9, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $10, 48
+
+; MIPS64R5-DAG: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $5, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $6, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $7, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $8, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $9, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $10, {{[0-9]+}}($sp)
+
+ %0 = add <2 x i8> %a, %b
+ %1 = add <2 x i8> %0, %c
+ %2 = add <2 x i8> %1, %d
+ %3 = add <2 x i8> %2, %e
+ %4 = add <2 x i8> %3, %f
+ %5 = add <2 x i8> %4, %g
+ ret <2 x i8> %5
+}
+
+define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) {
+; ALL-LABEL: i8_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+ %1 = add <4 x i8> %a, %b
+ ret <4 x i8> %1
+}
+
+define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
+; ALL-LABEL: i8_8:
+; MIPS32-NOT: lw
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 8
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 8
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = add <8 x i8> %a, %b
+ ret <8 x i8> %1
+}
+
+define <16 x i8> @i8_16(<16 x i8> %a, <16 x i8> %b) {
+; ALL-LABEL: i8_16:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = add <16 x i8> %a, %b
+
+ ret <16 x i8> %1
+}
+
+define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) {
+; ALL-LABEL: i16_2:
+; MIPS32: addu $[[R0:[0-9]+]], $4, $5
+; MIPS32: andi $[[R1:[0-9]+]], $[[R0]], 65535
+; MIPS32: srl $[[R2:[0-9]+]], $5, 16
+; MIPS32: srl $[[R3:[0-9]+]], $4, 16
+; MIPS32: addu $[[R4:[0-9]+]], $[[R3]], $[[R2]]
+; MIPS32: sll $2, $[[R4]], 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+ %1 = add <2 x i16> %a, %b
+ ret <2 x i16> %1
+}
+
+define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
+; ALL-LABEL: i16_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = add <4 x i16> %a, %b
+ ret <4 x i16> %1
+}
+
+define <8 x i16> @i16_8(<8 x i16> %a, <8 x i16> %b) {
+; ALL-LABEL: i16_8:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = add <8 x i16> %a, %b
+ ret <8 x i16> %1
+}
+
+define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
+; ALL-LABEL: i32_2:
+; MIPS32-DAG: addu $2, $4, $6
+; MIPS32-DAG: addu $3, $5, $7
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = add <2 x i32> %a, %b
+
+ ret <2 x i32> %1
+}
+
+define <4 x i32> @i32_4(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: i32_4:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $7, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+ %1 = add <4 x i32> %a, %b
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @i64_2(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: i64_2:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: daddu $2, $4, $6
+; MIPS64-DAG: daddu $3, $5, $7
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = add <2 x i64> %a, %b
+ ret <2 x i64> %1
+}
+
+; The MIPS vector ABI treats vectors of floats differently to vectors of
+; integers.
+
+; For arguments floating pointer vectors are bitcasted to integer vectors whose
+; elements are of GPR width and where the element count is deduced from
+; the length of the floating point vector divided by the size of the GPRs.
+
+; For returns, integer vectors are passed via the GPR register set, but
+; floating point vectors are returned via a hidden sret pointer.
+
+; For testing purposes we skip returning values here and test them below
+; instead.
+@float_res_v2f32 = external global <2 x float>
+
+define void @float_2(<2 x float> %a, <2 x float> %b) {
+; ALL-LABEL: float_2:
+; MIPS32: mtc1 $7, $f[[F0:[0-9]+]]
+; MIPS32: mtc1 $5, $f[[F1:[0-9]+]]
+; MIPS32: add.s $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]]
+; MIPS32: swc1 $f[[F2]]
+; MIPS32: mtc1 $6, $f[[F3:[0-9]+]]
+; MIPS32: mtc1 $4, $f[[F4:[0-9]+]]
+; MIPS32: add.s $f[[F5:[0-9]+]], $f[[F4]], $f[[F3]]
+; MIPS32: swc1 $f[[F5]]
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = fadd <2 x float> %a, %b
+ store <2 x float> %1, <2 x float> * @float_res_v2f32
+ ret void
+}
+
+@float_res_v4f32 = external global <4 x float>
+
+; For MSA this case is suboptimal, the 4 loads can be combined into a single
+; ld.w.
+
+define void @float_4(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: float_4:
+; MIPS32-DAG: mtc1 $4
+; MIPS32-DAG: mtc1 $5
+; MIPS32-DAG: mtc1 $6
+; MIPS32-DAG: mtc1 $7
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $6, 0
+; MIPS64-DAG: sll $[[R7:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R7]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R8:[0-9]+]], $6, 32
+; MIPS64-DAG: dsrl $[[R9:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R10:[0-9]+]], $[[R8]], 0
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R9]], 0
+; MIPS64-DAG: mtc1 $[[R10]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = fadd <4 x float> %a, %b
+ store <4 x float> %1, <4 x float> * @float_res_v4f32
+ ret void
+}
+
+@double_v2f64 = external global <2 x double>
+
+define void @double_2(<2 x double> %a, <2 x double> %b) {
+; ALL-LABEL: double_2:
+; MIPS32-DAG: sw $7
+; MIPS32-DAG: sw $6
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32: add.d
+; MIPS32-DAG: sw $5
+; MIPS32-DAG: sw $4
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32: add.d
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: dmtc1 $6, $f[[R0:[0-9]+]]
+; MIPS64-DAG: dmtc1 $4, $f[[R1:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R2:[0-9]+]], $f[[R1]], $f[[R0]]
+; MIPS64-DAG: dmtc1 $7, $f[[R3:[0-9]+]]
+; MIPS64-DAG: dmtc1 $5, $f[[R4:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R5:[0-9]+]], $f[[R4]], $f[[R3]]
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = fadd <2 x double> %a, %b
+ store <2 x double> %1, <2 x double> * @double_v2f64
+ ret void
+}
+
+; Return value testing.
+; Integer vectors are returned in $2, $3, $4, $5 for O32, $2, $3 for N32/N64
+; Floating point vectors are returned through a hidden sret pointer.
+
+@gv2i8 = global <2 x i8> <i8 1, i8 2>
+@gv4i8 = global <4 x i8> <i8 0, i8 1, i8 2, i8 3>
+@gv8i8 = global <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+@gv16i8 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+@gv2i16 = global <2 x i16> <i16 1, i16 2>
+@gv4i16 = global <4 x i16> <i16 0, i16 1, i16 2, i16 3>
+@gv8i16 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+
+@gv2i32 = global <2 x i32> <i32 0, i32 1>
+@gv4i32 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+@gv2i64 = global <2 x i64> <i64 0, i64 1>
+
+define <2 x i8> @ret_2_i8() {
+; ALL-LABEL: ret_2_i8:
+; MIPS32-DAG: lhu $2
+; MIPS32R5-DAG: lhu $2
+
+; FIXME: why is this lh instead of lhu on mips64?
+
+; MIPS64-DAG: lh $2
+; MIPS64-DAG: lh $2
+ %1 = load <2 x i8>, <2 x i8> * @gv2i8
+ ret <2 x i8> %1
+}
+
+define <4 x i8> @ret_4_i8() {
+; ALL-LABEL: ret_4_i8:
+; MIPS32-DAG: lw $2
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG: lw $2
+; MIPS64R5-DAG: lw $2
+
+ %1 = load <4 x i8>, <4 x i8> * @gv4i8
+ ret <4 x i8> %1
+}
+
+define <8 x i8> @ret_8_i8() {
+; ALL-LABEL: ret_8_i8:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG: ld $2
+; MIPS64R5-DAG: ld $2
+ %1 = load <8 x i8>, <8 x i8> * @gv8i8
+ ret <8 x i8> %1
+}
+
+define <16 x i8> @ret_16_i8() {
+; ALL-LABEL: ret_16_i8:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+ %1 = load <16 x i8>, <16 x i8> * @gv16i8
+ ret <16 x i8> %1
+}
+
+define <2 x i16> @ret_2_i16() {
+; ALL-LABEL: ret_2_i16:
+; MIPS32-DAG: lw $2
+
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG: lw $2
+
+; MIPS64R5-DAG: lw $2
+ %1 = load <2 x i16>, <2 x i16> * @gv2i16
+ ret <2 x i16> %1
+}
+
+define <4 x i16> @ret_4_i16() {
+; ALL-LABEL: ret_4_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG: ld $2
+; MIPS64R5-DAG: ld $2
+ %1 = load <4 x i16>, <4 x i16> * @gv4i16
+ ret <4 x i16> %1
+}
+
+define <8 x i16> @ret_8_i16() {
+; ALL-LABEL: ret_8_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+ %1 = load <8 x i16>, <8 x i16> * @gv8i16
+ ret <8 x i16> %1
+}
+
+define <2 x i32> @ret_2_i32() {
+; ALL-LABEL: ret_2_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG: ld $2
+; MIPS64R5-DAG: ld $2
+
+ %1 = load <2 x i32>, <2 x i32> * @gv2i32
+ ret <2 x i32> %1
+}
+
+define <4 x i32> @ret_4_i32() {
+; ALL-LABEL: ret_4_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+ %1 = load <4 x i32>, <4 x i32> * @gv4i32
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @ret_2_i64() {
+; ALL-LABEL: ret_2_i64:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+ %1 = load <2 x i64>, <2 x i64> * @gv2i64
+ ret <2 x i64> %1
+}
+
+@gv2f32 = global <2 x float> <float 0.0, float 0.0>
+@gv4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
+
+define <2 x float> @ret_float_2() {
+entry:
+; ALL-LABEL: ret_float_2:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS64: ld $2
+
+; MIPS64R5: ld $2
+
+ %0 = load <2 x float>, <2 x float> * @gv2f32
+ ret <2 x float> %0
+}
+
+define <4 x float> @ret_float_4() {
+entry:
+; ALL-LABEL: ret_float_4:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: st.w $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+ %0 = load <4 x float>, <4 x float> * @gv4f32
+ ret <4 x float> %0
+}
+
+@gv2f64 = global <2 x double> <double 0.0, double 0.0>
+
+define <2 x double> @ret_double_2() {
+entry:
+; ALL-LABEL: ret_double_2:
+
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 0($4)
+
+; MIPS32R5: st.d $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $2
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+ %0 = load <2 x double>, <2 x double> * @gv2f64
+ ret <2 x double> %0
+}
+
+; Test argument lowering and call result lowering.
+
+define void @call_i8_2() {
+entry:
+; ALL-LABEL: call_i8_2:
+; MIPS32EB-DAG: addiu $4
+; MIPS32EB-DAG: addiu $5
+; MIPS32-NOT: addiu $6
+; MIPS32-NOT: addiu $7
+
+; MIPS32R5-DAG: lhu $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lhu $5, {{[0-9]+}}($sp)
+
+; MIPS32R5: jal
+; MIPS32R5: sw $2, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: sb ${{[0-9]+}}, 1(${{[0-9]+}})
+; MIPS32R5-DAG; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
+
+; MIPS64EB: daddiu $4, $zero, 1543
+; MIPS64EB: daddiu $5, $zero, 3080
+
+; MIPS64EL: daddiu $4, $zero, 1798
+; MIPS64EL; daddiu $5, $zero, 2060
+
+; MIPS64R5-DAG: lh $4
+; MIPS64R5-DAG: lh $5
+
+; MIPS32: jal i8_2
+; MIPS64: jalr $25
+
+; MIPS32EB-DAG: srl $[[R0:[0-9]+]], $2, 16
+; MIPS32EB-DAG: sb $[[R0]]
+; MIPS32EB-DAG: srl $[[R1:[0-9]+]], $2, 24
+; MIPS32EB-DAG: sb $[[R1]]
+
+; MIPS32EL: sb $2
+; MIPS32EL: srl $[[R0:[0-9]+]], $2, 8
+; MIPS32EL: sb $[[R0]]
+
+; MIPS64EB: dsrl $[[R4:[0-9]+]], $2, 48
+; MIPS64EB: sb $[[R4]]
+; MIPS64EB: dsrl $[[R5:[0-9]+]], $2, 56
+; MIPS64EB: sb $[[R5]]
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $2, 0
+; MIPS64EL: sb $[[R6]]
+; MIPS64EL: srl $[[R7:[0-9]+]], $[[R6]], 8
+; MIPS64EL: sb $[[R7]]
+
+; MIPS64R5: sd $2
+
+ %0 = call <2 x i8> @i8_2(<2 x i8> <i8 6, i8 7>, <2 x i8> <i8 12, i8 8>)
+ store <2 x i8> %0, <2 x i8> * @gv2i8
+ ret void
+}
+
+define void @call_i8_4() {
+entry:
+; ALL-LABEL: call_i8_4:
+; MIPS32: ori $4
+; MIPS32: ori $5
+; MIPS32-NOT: ori $6
+; MIPS32-NOT: ori $7
+
+; MIPS32R5-DAG: lw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lw $5, {{[0-9]+}}($sp)
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5: lw $4
+; MIPS64R5: lw $5
+
+; MIPS32: jal i8_4
+; MIPS64: jalr $25
+
+; MIPS32: sw $2
+
+; MIPS32R5-DAG: sw $2
+
+; MIPS64: sw $2
+; MIPS64R5: sw $2
+
+ %0 = call <4 x i8> @i8_4(<4 x i8> <i8 6, i8 7, i8 9, i8 10>, <4 x i8> <i8 12, i8 8, i8 9, i8 10>)
+ store <4 x i8> %0, <4 x i8> * @gv4i8
+ ret void
+}
+
+define void @call_i8_8() {
+entry:
+; ALL-LABEL: call_i8_8:
+
+; MIPS32: ori $6
+; MIPS32: ori $4
+; MIPS32: move $5
+; MIPS32: move $7
+
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: move $5
+; MIPS32R5-DAG: move $7
+
+; MIPS64EB: daddiu $4, ${{[0-9]+}}, 2314
+; MIPS64EB: daddiu $5, ${{[0-9]+}}, 2314
+
+; MIPS64EL: daddiu $4, ${{[0-9]+}}, 1798
+; MIPS64EL: daddiu $5, ${{[0-9]+}}, 2060
+
+; MIPS32: jal i8_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2
+; MIPS32-DAG: sw $3
+
+; MIPS32R5-DAG: sw $2
+; MIPS32R5-DAG: sw $3
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+ %0 = call <8 x i8> @i8_8(<8 x i8> <i8 6, i8 7, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>, <8 x i8> <i8 12, i8 8, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>)
+ store <8 x i8> %0, <8 x i8> * @gv8i8
+ ret void
+}
+
+define void @calli8_16() {
+entry:
+; ALL-LABEL: calli8_16:
+; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS32: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: ori $7, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: move $5, ${{[0-9]+}}
+; MIPS32: move $6, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $7
+
+; MIPS64R5-DAG: copy_s.d $4
+; MIPS64R5-DAG: copy_s.d $5
+; MIPS64R5-DAG: copy_s.d $6
+; MIPS64R5-DAG: copy_s.d $7
+
+; MIPS32: jal i8_16
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv16i8)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <16 x i8> @i8_16(<16 x i8> <i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7, i8 6, i8 7, i8 9, i8 10>, <16 x i8> <i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 12, i8 8, i8 9, i8 10>)
+ store <16 x i8> %0, <16 x i8> * @gv16i8
+ ret void
+}
+
+define void @calli16_2() {
+entry:
+; ALL-LABEL: calli16_2:
+
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+
+; MIPS32R5-DAG: lw $4
+; MIPS32R5-DAG: lw $5
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5-DAG: lw $4
+; MIPS64R5-DAG: lw $5
+
+; MIPS32: jal i16_2
+; MIPS64: jalr $25
+
+; MIPS32: sw $2, %lo(gv2i16)
+
+; MIPS32R5: sw $2, %lo(gv2i16)
+
+; MIPS64: sw $2
+
+; MIPS64R6: sw $2
+
+ %0 = call <2 x i16> @i16_2(<2 x i16> <i16 6, i16 7>, <2 x i16> <i16 12, i16 8>)
+ store <2 x i16> %0, <2 x i16> * @gv2i16
+ ret void
+}
+
+define void @calli16_4() {
+entry:
+; ALL-LABEL: calli16_4:
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+; MIPS32-DAG: ori $6
+; MIPS32-DAG: move $7
+
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: ori $5
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: move $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i16_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32R5-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+ %0 = call <4 x i16> @i16_4(<4 x i16> <i16 6, i16 7, i16 9, i16 10>, <4 x i16> <i16 12, i16 8, i16 9, i16 10>)
+ store <4 x i16> %0, <4 x i16> * @gv4i16
+ ret void
+}
+
+define void @calli16_8() {
+entry:
+; ALL-LABEL: calli16_8:
+
+; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: ori $5, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: move $6, ${{[0-9]+}}
+; MIPS32-DAG: move $7, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $7
+; MIPS64-DAG: move $5
+; MIPS64-DAG: move $6
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i16_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv8i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64: sd $3
+; MIPS64: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W2:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W2]][1], $3
+
+ %0 = call <8 x i16> @i16_8(<8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 6, i16 7, i16 9, i16 10>, <8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 12, i16 8, i16 9, i16 10>)
+ store <8 x i16> %0, <8 x i16> * @gv8i16
+ ret void
+}
+
+define void @calli32_2() {
+entry:
+; ALL-LABEL: calli32_2:
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i32_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+ %0 = call <2 x i32> @i32_2(<2 x i32> <i32 6, i32 7>, <2 x i32> <i32 12, i32 8>)
+ store <2 x i32> %0, <2 x i32> * @gv2i32
+ ret void
+}
+
+define void @calli32_4() {
+entry:
+; ALL-LABEL: calli32_4:
+
+; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: move $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i32_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <4 x i32> @i32_4(<4 x i32> <i32 6, i32 7, i32 9, i32 10>, <4 x i32> <i32 12, i32 8, i32 9, i32 10>)
+ store <4 x i32> %0, <4 x i32> * @gv4i32
+ ret void
+}
+
+define void @calli64_2() {
+entry:
+; ALL-LABEL: calli64_2:
+
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+; MIPS64: daddiu $6
+; MIPS64: daddiu $7
+
+; MIPS64R5: daddiu $4
+; MIPS64R5: daddiu $5
+; MIPS64R5: daddiu $6
+; MIPS64R5: daddiu $7
+
+; MIPS32: jal i64_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv2i64)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <2 x i64> @i64_2(<2 x i64> <i64 6, i64 7>, <2 x i64> <i64 12, i64 8>)
+ store <2 x i64> %0, <2 x i64> * @gv2i64
+ ret void
+}
+
+declare <2 x float> @float2_extern(<2 x float>, <2 x float>)
+declare <4 x float> @float4_extern(<4 x float>, <4 x float>)
+declare <2 x double> @double2_extern(<2 x double>, <2 x double>)
+
+define void @callfloat_2() {
+entry:
+; ALL-LABEL: callfloat_2:
+
+; MIPS32-DAG: addiu $4, $sp, 24
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: lui $7
+
+; MIPS32R5-DAG: addiu $4, $sp, 24
+; MIPS32R5-DAG: addiu $6, $zero, 0
+; MIPS32R5-DAG: lui $7
+
+; MIPS64: dsll $4
+; MIPS64: dsll $5
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+
+; MIPS32: jal float2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32R5-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32R5-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+ %0 = call <2 x float> @float2_extern(<2 x float> <float 0.0, float -1.0>, <2 x float> <float 12.0, float 14.0>)
+ store <2 x float> %0, <2 x float> * @gv2f32
+ ret void
+}
+
+define void @callfloat_4() {
+entry:
+; ALL-LABEL: callfloat_4:
+
+; MIPS32: sw ${{[0-9]+}}, 36($sp)
+; MIPS32: sw ${{[0-9]+}}, 32($sp)
+; MIPS32: sw ${{[0-9]+}}, 28($sp)
+; MIPS32: sw ${{[0-9]+}}, 24($sp)
+; MIPS32: sw ${{[0-9]+}}, 20($sp)
+; MIPS32: sw ${{[0-9]+}}, 16($sp)
+; MIPS32: addiu $4, $sp, 48
+; MIPS32: addiu $6, $zero, 0
+; MIPS32: lui $7
+
+; MIPS32R5: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5: copy_s.w $7, $w{{[0-9]+}}
+; MIPS32R5: sw ${{[0-9]+}}, 36($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 32($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 16($sp)
+; MIPS32R5: addiu $4, $sp, 48
+
+; MIPS64-DAG: dsll $4
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS64: jalr $25
+; MIPS32: jal
+
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 52($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 56($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 60($sp)
+
+; MIPS32R5: ld.w $w{{[0-9]+}}, 48($sp)
+
+; MIPS64-DAG: $2
+; MIPS64-DAG: $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <4 x float> @float4_extern(<4 x float> <float 0.0, float -1.0, float 2.0, float 4.0>, <4 x float> <float 12.0, float 14.0, float 15.0, float 16.0>)
+ store <4 x float> %0, <4 x float> * @gv4f32
+ ret void
+}
+
+define void @calldouble_2() {
+entry:
+; ALL-LABEL: calldouble_2:
+
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 36($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 32($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4, $sp, [[R0:[0-9]+]]
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: addiu $7, $zero, 0
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 36($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 32($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+; MIPS64-DAG: daddiu $4
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS32: jal double2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: ldc1 $f[[F0:[0-9]+]], 48($sp)
+; MIPS32-DAG: ldc1 $f[[F1:[0-9]+]], 56($sp)
+
+; MIPS32-DAG: sdc1 $f[[F1]], 8(${{[0-9]+}})
+; MIPS32-DAG: sdc1 $f[[F0]], %lo(gv2f64)(${{[0-9]+}})
+
+; MIPS32R5: ld.d $w[[W0:[0-9]+]], 48($sp)
+; MIPS32R5: st.d $w[[W0]], 0(${{[0-9]+}})
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <2 x double> @double2_extern(<2 x double> <double 0.0, double -1.0>, <2 x double> <double 12.0, double 14.0>)
+ store <2 x double> %0, <2 x double> * @gv2f64
+ ret void
+}
+
+; The mixed tests show that due to alignment requirements, $5 is not used
+; in argument passing.
+
+define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) {
+entry:
+; ALL-LABEL: mixed_i8:
+
+; MIPS32-DAG: mtc1 $5, $f{{[0-9]+}}
+; MIPS32: andi $[[R7:[0-9]+]], $6, 255
+; MIPS32: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS32: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS32-DAG: mtc1 $4, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 20($sp)
+; MIPS32-DAG: add.s $f0, $f{{[0-9]+}}, $f{{[0-9]+}}
+
+; MIPS32R5: andi $[[R0:[0-9]+]], $6, 255
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+
+; MIPS64EB-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EB-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EB: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EB: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EB: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EB: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EB-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EB-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EB-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EB-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EB-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64EB-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EB-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EB-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EL-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EL-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EL: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EL: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EL: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EL-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EL-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EL-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EL-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EL-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EL-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+
+; MIPS64R5: sll $[[R0:[0-9]+]], $5, 0
+; MIPS64R5: andi $[[R1:[0-9]+]], $[[R0]], 255
+; MIPS64R5: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5: sd $6, {{[0-9]+}}($sp)
+
+ %0 = zext i8 %b to i32
+ %1 = uitofp i32 %0 to float
+ %2 = insertelement <2 x float> undef, float %1, i32 0
+ %3 = insertelement <2 x float> %2, float %1, i32 1
+ %4 = fadd <2 x float> %3, %a
+ %5 = fadd <2 x float> %4, %c
+ %6 = extractelement <2 x float> %5, i32 0
+ %7 = extractelement <2 x float> %5, i32 1
+ %8 = fadd float %6, %7
+ ret float %8
+}
+
+define <4 x float> @mixed_32(<4 x float> %a, i32 %b) {
+entry:
+; ALL-LABEL: mixed_32:
+
+; MIPS32-DAG: mtc1 $6, $f{{[0-9]+}}
+; MIPS32-DAG: mtc1 $7, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 28($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 24($sp)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][1], $7
+; MIPS32R5: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][2], $[[R0]]
+; MIPS32R5: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][3], $[[R1]]
+; MIPS32R5: lw $[[R0:[0-9]+]], 24($sp)
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: dsrl $[[R0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $[[R0]], 0
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $4, 0
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R6:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64R5: insert.d $w[[W0:[0-9]+]][0], $4
+; MIPS64R5: insert.d $w[[W0]][1], $5
+; MIPS64R5: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64R5: fill.w $w{{[0-9]+}}, $[[R0]]
+
+ %0 = uitofp i32 %b to float
+ %1 = insertelement <4 x float> undef, float %0, i32 0
+ %2 = insertelement <4 x float> %1, float %0, i32 1
+ %3 = insertelement <4 x float> %2, float %0, i32 2
+ %4 = insertelement <4 x float> %3, float %0, i32 3
+ %5 = fadd <4 x float> %4, %a
+ ret <4 x float> %5
+}
+
+
+; This test is slightly more fragile than I'd like as the offset into the
+; outgoing arguments area is dependant on the size of the stack frame for
+; this function.
+
+define <4 x float> @cast(<4 x i32> %a) {
+entry:
+; ALL-LABEL: cast:
+
+; MIPS32: addiu $sp, $sp, -32
+; MIPS32-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32-DAG: sw $7, {{[0-9]+}}($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 52($sp)
+
+; MIPS32R5-DAG: insert.w $w0[0], $6
+; MIPS32R5-DAG: insert.w $w0[1], $7
+; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w0[2], $[[R0]]
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w0[3], $[[R1]]
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d $w0[0], $4
+; MIPS64R5-DAG: insert.d $w0[1], $5
+
+ %0 = uitofp <4 x i32> %a to <4 x float>
+ ret <4 x float> %0
+}
+
+define <4 x float> @select(<4 x i32> %cond, <4 x float> %arg1, <4 x float> %arg2) {
+entry:
+; ALL-LABEL: select:
+
+; MIPS32-DAG: andi ${{[0-9]+}}, $7, 1
+; MIPS32-DAG: andi ${{[0-9]+}}, $6, 1
+; MIPS32-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+; MIPS32-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $7
+; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R0]]
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R1]]
+; MIPS32R5-DAG: slli.w $w{{[0-9]}}, $w[[W0]]
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R1:[0-9]+]], $6, 32
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R3:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R4:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $8, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R7:[0-9]+]], $8, 32
+; MIPS64-DAG: sll $[[R8:[0-9]+]], $[[R7]], 0
+; MIPS64-DAG: mtc1 $[[R8]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R9:[0-9]+]], $9, 0
+; MIPS64-DAG: mtc1 $[[R9]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R10:[0-9]+]], $9, 32
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R10]], 0
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R12:[0-9]+]], $4, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R12]], 1
+; MIPS64-DAG: dsrl $[[R13:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R14:[0-9]+]], $[[R13]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R14]], 1
+
+; MIPS64-DAG: sll $[[R15:[0-9]+]], $5, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R15]], 1
+; MIPS64-DAG: dsrl $[[R16:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R17:[0-9]+]], $[[R16]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R17]], 1
+
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $8
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $9
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $6
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $7
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $4
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $5
+
+ %cond.t = trunc <4 x i32> %cond to <4 x i1>
+ %res = select <4 x i1> %cond.t, <4 x float> %arg1, <4 x float> %arg2
+ ret <4 x float> %res
+}
diff --git a/test/CodeGen/Mips/ctlz-v.ll b/test/CodeGen/Mips/ctlz-v.ll
index 3d580e5771f4..156c640681b7 100644
--- a/test/CodeGen/Mips/ctlz-v.ll
+++ b/test/CodeGen/Mips/ctlz-v.ll
@@ -8,10 +8,14 @@ entry:
; MIPS32: clz $2, $4
; MIPS32: clz $3, $5
-; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0
-; MIPS64-DAG: clz $2, $[[A0]]
-; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: clz $3, $[[A1]]
+; MIPS64-DAG: dsrl $[[A0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[A1:[0-9]+]], $[[A0]], 0
+; MIPS64-DAG: clz $[[R0:[0-9]+]], $[[A1]]
+; MIPS64-DAG: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; MIPS64-DAG: sll $[[A2:[0-9]+]], $4, 0
+; MIPS64-DAG: clz $[[R2:[0-9]+]], $[[A2]]
+; MIPS64-DAG: dext $[[R3:[0-9]+]], $[[R2]], 0, 32
+; MIPS64-DAG: or $2, $[[R3]], $[[R1]]
%ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
ret <2 x i32> %ret
diff --git a/test/CodeGen/Mips/cttz-v.ll b/test/CodeGen/Mips/cttz-v.ll
index 85f69f9a17d9..dbcde7f5fe5b 100644
--- a/test/CodeGen/Mips/cttz-v.ll
+++ b/test/CodeGen/Mips/cttz-v.ll
@@ -24,14 +24,17 @@ entry:
; MIPS64-DAG: and $[[R2:[0-9]+]], $[[R1]], $[[R0]]
; MIPS64-DAG: clz $[[R3:[0-9]+]], $[[R2]]
; MIPS64-DAG: addiu $[[R4:[0-9]+]], $zero, 32
-; MIPS64-DAG: subu $2, $[[R4]], $[[R3]]
-; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: addiu $[[R5:[0-9]+]], $[[A1]], -1
-; MIPS64-DAG: not $[[R6:[0-9]+]], $[[A1]]
-; MIPS64-DAG: and $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; MIPS64-DAG: clz $[[R8:[0-9]+]], $[[R7]]
-; MIPS64-DAG: jr $ra
-; MIPS64-DAG: subu $3, $[[R4]], $[[R8]]
+; MIPS64-DAG: subu $[[R5:[0-9]+]], $[[R4]], $[[R3]]
+; MIPS64-DAG: dsrl $[[R6:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R7:[0-9]+]], $[[R6]], 0
+; MIPS64-DAG: dext $[[R8:[0-9]+]], $[[R5]], 0, 32
+; MIPS64-DAG: addiu $[[R9:[0-9]+]], $[[R7]], -1
+; MIPS64-DAG: not $[[R10:[0-9]+]], $[[R7]]
+; MIPS64-DAG: and $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; MIPS64-DAG: clz $[[R12:[0-9]+]], $[[R11]]
+; MIPS64-DAG: subu $[[R13:[0-9]+]], $[[R4]], $[[R12]]
+; MIPS64-DAG: dsll $[[R14:[0-9]+]], $[[R13]], 32
+; MIPS64-DAG: or $2, $[[R8]], $[[R14]]
%ret = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true)
ret <2 x i32> %ret
diff --git a/test/CodeGen/Mips/dsp-r1.ll b/test/CodeGen/Mips/dsp-r1.ll
index edd6258270a0..90eb14a75b42 100644
--- a/test/CodeGen/Mips/dsp-r1.ll
+++ b/test/CodeGen/Mips/dsp-r1.ll
@@ -1172,9 +1172,19 @@ entry:
ret { i32 } %.fca.0.insert
}
+define { i32 } @test__builtin_mips_repl_ph2(i32 %i0) nounwind readnone {
+entry:
+; CHECK: repl.ph
+
+ %0 = tail call <2 x i16> @llvm.mips.repl.ph(i32 -2)
+ %1 = bitcast <2 x i16> %0 to i32
+ %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+ ret { i32 } %.fca.0.insert
+}
+
declare <2 x i16> @llvm.mips.repl.ph(i32) nounwind readnone
-define { i32 } @test__builtin_mips_repl_ph2(i32 %i0, i32 %a0) nounwind readnone {
+define { i32 } @test__builtin_mips_repl_ph3(i32 %i0, i32 %a0) nounwind readnone {
entry:
; CHECK: replv.ph
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
index c155eedd62c4..d7f6308ac0b0 100644
--- a/test/CodeGen/Mips/fmadd1.ll
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -5,52 +5,63 @@
; IEEE 754 (1985) and IEEE 754 (2008). These instructions are therefore only
; available when -enable-no-nans-fp-math is given.
-; RUN: llc < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32,32-NONAN
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NONAN-NOMADD
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32R2,32R2-NONAN
-; RUN: llc < %s -march=mipsel -mcpu=mips32r6 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32R6,32R6-NONAN
+; RUN: llc < %s -march=mipsel -mcpu=mips32r6 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32R6-NOMADD,32R6-NONAN-NOMADD
; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64,64-NONAN
; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64R2,64R2-NONAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64R6,64R6-NONAN
-; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefixes=ALL,32,32-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NONAN-NOMADD
+; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NAN-NOMADD
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefixes=ALL,32R2,32R2-NAN
-; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefixes=ALL,32R6,32R6-NAN
+; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefixes=ALL,32R6-NOMADD,32R6-NAN-NOMADD
; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64,64-NAN
; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64R2,64R2-NAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64R6,64R6-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NAN-NOMADD
+
+; Check that madd.[ds], msub.[ds], nmadd.[ds], and nmsub.[ds] are not generated
+; when +nomadd attribute is specified.
+; Output for mips32 and mips64r6 reused since aforementioned instructions are
+; not generated in those cases.
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NONAN-NOMADD
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 -enable-no-nans-fp-math -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NONAN-NOMADD
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -enable-no-nans-fp-math -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NONAN-NOMADD
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NAN-NOMADD
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NAN-NOMADD
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NAN-NOMADD
define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
entry:
; ALL-LABEL: FOO0float:
-; 32-DAG: mtc1 $6, $[[T0:f[0-9]+]]
-; 32-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG: add.s $f0, $[[T1]], $[[T2]]
+; 32-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]]
+; 32-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]]
-; 32R2: mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2: madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2: add.s $f0, $[[T1]], $[[T2]]
+; 32R2: mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2: madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2: add.s $f0, $[[T1]], $[[T2]]
-; 32R6-DAG: mtc1 $6, $[[T0:f[0-9]+]]
-; 32R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG: add.s $f0, $[[T1]], $[[T2]]
+; 32R6-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]]
-; 64-DAG: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-DAG: add.s $f0, $[[T0]], $[[T1]]
+; 64-DAG: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG: add.s $f0, $[[T0]], $[[T1]]
-; 64R2: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2: add.s $f0, $[[T0]], $[[T1]]
+; 64R2: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2: add.s $f0, $[[T0]], $[[T1]]
-; 64R6-DAG: mul.s $[[T0:f[0-9]+]], $f12, $f13
-; 64R6-DAG: add.s $[[T1:f[0-9]+]], $[[T0]], $f14
-; 64R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG: add.s $f0, $[[T1]], $[[T2]]
+; 64R6-NOMADD-DAG: mul.s $[[T0:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG: add.s $[[T1:f[0-9]+]], $[[T0]], $f14
+; 64R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]]
%mul = fmul float %a, %b
%add = fadd float %mul, %c
@@ -62,35 +73,35 @@ define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
entry:
; ALL-LABEL: FOO1float:
-; 32-DAG: mtc1 $6, $[[T0:f[0-9]+]]
-; 32-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG: add.s $f0, $[[T1]], $[[T2]]
+; 32-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]]
+; 32-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]]
-; 32R2: mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2: msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2: add.s $f0, $[[T1]], $[[T2]]
+; 32R2: mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2: msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2: add.s $f0, $[[T1]], $[[T2]]
-; 32R6-DAG: mtc1 $6, $[[T0:f[0-9]+]]
-; 32R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG: add.s $f0, $[[T1]], $[[T2]]
+; 32R6-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]]
-; 64-DAG: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-DAG: add.s $f0, $[[T0]], $[[T1]]
+; 64-DAG: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG: add.s $f0, $[[T0]], $[[T1]]
-; 64R2: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2: add.s $f0, $[[T0]], $[[T1]]
+; 64R2: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2: add.s $f0, $[[T0]], $[[T1]]
-; 64R6-DAG: mul.s $[[T0:f[0-9]+]], $f12, $f13
-; 64R6-DAG: sub.s $[[T1:f[0-9]+]], $[[T0]], $f14
-; 64R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG: add.s $f0, $[[T1]], $[[T2]]
+; 64R6-NOMADD-DAG: mul.s $[[T0:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG: sub.s $[[T1:f[0-9]+]], $[[T0]], $f14
+; 64R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]]
%mul = fmul float %a, %b
%sub = fsub float %mul, %c
@@ -102,42 +113,42 @@ define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
entry:
; ALL-LABEL: FOO2float:
-; 32-DAG: mtc1 $6, $[[T0:f[0-9]+]]
-; 32-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG: sub.s $f0, $[[T2]], $[[T1]]
+; 32-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]]
+; 32-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]]
-; 32R2-NONAN: mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2-NONAN: nmadd.s $f0, $[[T0]], $f12, $f14
+; 32R2-NONAN: mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NONAN: nmadd.s $f0, $[[T0]], $f12, $f14
-; 32R2-NAN: mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2-NAN: madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]]
+; 32R2-NAN: mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NAN: madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]]
-; 32R6-DAG: mtc1 $6, $[[T0:f[0-9]+]]
-; 32R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG: sub.s $f0, $[[T2]], $[[T1]]
+; 32R6-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]]
-; 64-NONAN: nmadd.s $f0, $f14, $f12, $f13
+; 64-NONAN: nmadd.s $f0, $f14, $f12, $f13
-; 64-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-NAN: sub.s $f0, $[[T1]], $[[T0]]
+; 64-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN: sub.s $f0, $[[T1]], $[[T0]]
-; 64R2-NONAN: nmadd.s $f0, $f14, $f12, $f13
+; 64R2-NONAN: nmadd.s $f0, $f14, $f12, $f13
-; 64R2-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2-NAN: sub.s $f0, $[[T1]], $[[T0]]
+; 64R2-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN: sub.s $f0, $[[T1]], $[[T0]]
-; 64R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG: sub.s $f0, $[[T2]], $[[T1]]
+; 64R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]]
%mul = fmul float %a, %b
%add = fadd float %mul, %c
@@ -149,34 +160,34 @@ define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
entry:
; ALL-LABEL: FOO3float:
-; 32-DAG: mtc1 $6, $[[T0:f[0-9]+]]
-; 32-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG: sub.s $f0, $[[T2]], $[[T1]]
+; 32-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]]
+; 32-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]]
-; 32R2-NONAN: mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2-NONAN: nmsub.s $f0, $[[T0]], $f12, $f14
+; 32R2-NONAN: mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NONAN: nmsub.s $f0, $[[T0]], $f12, $f14
-; 32R2-NAN: mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2-NAN: msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]]
+; 32R2-NAN: mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NAN: msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]]
-; 64-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-NAN: sub.s $f0, $[[T1]], $[[T0]]
+; 64-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN: sub.s $f0, $[[T1]], $[[T0]]
-; 64-NONAN: nmsub.s $f0, $f14, $f12, $f13
+; 64-NONAN: nmsub.s $f0, $f14, $f12, $f13
-; 64R2-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2-NAN: sub.s $f0, $[[T1]], $[[T0]]
+; 64R2-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN: sub.s $f0, $[[T1]], $[[T0]]
-; 64R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG: sub.s $f0, $[[T2]], $[[T1]]
+; 64R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]]
%mul = fmul float %a, %b
%sub = fsub float %mul, %c
@@ -188,36 +199,36 @@ define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
entry:
; ALL-LABEL: FOO10double:
-; 32-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG: add.d $f0, $[[T1]], $[[T2]]
-
-; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2: mthc1 $zero, $[[T2]]
-; 32R2: add.d $f0, $[[T1]], $[[T2]]
-
-; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG: add.d $f0, $[[T1]], $[[T2]]
-
-; 64-DAG: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-DAG: add.d $f0, $[[T0]], $[[T1]]
-
-; 64R2: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2: add.d $f0, $[[T0]], $[[T1]]
-
-; 64R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG: add.d $f0, $[[T1]], $[[T2]]
+; 32-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]]
+
+; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2: mthc1 $zero, $[[T2]]
+; 32R2: add.d $f0, $[[T1]], $[[T2]]
+
+; 32R6-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]]
+
+; 64-DAG: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG: add.d $f0, $[[T0]], $[[T1]]
+
+; 64R2: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2: add.d $f0, $[[T0]], $[[T1]]
+
+; 64R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]]
%mul = fmul double %a, %b
%add = fadd double %mul, %c
@@ -229,36 +240,36 @@ define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
entry:
; ALL-LABEL: FOO11double:
-; 32-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG: add.d $f0, $[[T1]], $[[T2]]
-
-; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2: mthc1 $zero, $[[T2]]
-; 32R2: add.d $f0, $[[T1]], $[[T2]]
-
-; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG: add.d $f0, $[[T1]], $[[T2]]
-
-; 64-DAG: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-DAG: add.d $f0, $[[T0]], $[[T1]]
-
-; 64R2: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2: add.d $f0, $[[T0]], $[[T1]]
-
-; 64R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG: add.d $f0, $[[T1]], $[[T2]]
+; 32-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]]
+
+; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2: mthc1 $zero, $[[T2]]
+; 32R2: add.d $f0, $[[T1]], $[[T2]]
+
+; 32R6-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]]
+
+; 64-DAG: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG: add.d $f0, $[[T0]], $[[T1]]
+
+; 64R2: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2: add.d $f0, $[[T0]], $[[T1]]
+
+; 64R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]]
%mul = fmul double %a, %b
%sub = fsub double %mul, %c
@@ -270,43 +281,43 @@ define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
entry:
; ALL-LABEL: FOO12double:
-; 32-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG: sub.d $f0, $[[T2]], $[[T1]]
+; 32-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]]
-; 32R2-NONAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2-NONAN: nmadd.d $f0, $[[T0]], $f12, $f14
+; 32R2-NONAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NONAN: nmadd.d $f0, $[[T0]], $f12, $f14
-; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2-NAN: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2-NAN: mthc1 $zero, $[[T2]]
-; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]]
+; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NAN: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN: mthc1 $zero, $[[T2]]
+; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]]
-; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG: sub.d $f0, $[[T2]], $[[T1]]
+; 32R6-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]]
-; 64-NONAN: nmadd.d $f0, $f14, $f12, $f13
+; 64-NONAN: nmadd.d $f0, $f14, $f12, $f13
-; 64-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-NAN: sub.d $f0, $[[T1]], $[[T0]]
+; 64-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN: sub.d $f0, $[[T1]], $[[T0]]
-; 64R2-NONAN: nmadd.d $f0, $f14, $f12, $f13
+; 64R2-NONAN: nmadd.d $f0, $f14, $f12, $f13
-; 64R2-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2-NAN: sub.d $f0, $[[T1]], $[[T0]]
+; 64R2-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN: sub.d $f0, $[[T1]], $[[T0]]
-; 64R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG: sub.d $f0, $[[T2]], $[[T1]]
+; 64R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]]
%mul = fmul double %a, %b
%add = fadd double %mul, %c
@@ -318,43 +329,43 @@ define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
entry:
; ALL-LABEL: FOO13double:
-; 32-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG: sub.d $f0, $[[T2]], $[[T1]]
+; 32-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]]
-; 32R2-NONAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2-NONAN: nmsub.d $f0, $[[T0]], $f12, $f14
+; 32R2-NONAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NONAN: nmsub.d $f0, $[[T0]], $f12, $f14
-; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2-NAN: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2-NAN: mthc1 $zero, $[[T2]]
-; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]]
+; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NAN: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN: mthc1 $zero, $[[T2]]
+; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]]
-; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG: sub.d $f0, $[[T2]], $[[T1]]
+; 32R6-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]]
-; 64-NONAN: nmsub.d $f0, $f14, $f12, $f13
+; 64-NONAN: nmsub.d $f0, $f14, $f12, $f13
-; 64-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-NAN: sub.d $f0, $[[T1]], $[[T0]]
+; 64-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN: sub.d $f0, $[[T1]], $[[T0]]
-; 64R2-NONAN: nmsub.d $f0, $f14, $f12, $f13
+; 64R2-NONAN: nmsub.d $f0, $f14, $f12, $f13
-; 64R2-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2-NAN: sub.d $f0, $[[T1]], $[[T0]]
+; 64R2-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN: sub.d $f0, $[[T1]], $[[T0]]
-; 64R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG: sub.d $f0, $[[T2]], $[[T1]]
+; 64R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG: dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]]
%mul = fmul double %a, %b
%sub = fsub double %mul, %c
diff --git a/test/CodeGen/Mips/llvm-ir/mul.ll b/test/CodeGen/Mips/llvm-ir/mul.ll
index 20853073dfa6..1562372ce9a0 100644
--- a/test/CodeGen/Mips/llvm-ir/mul.ll
+++ b/test/CodeGen/Mips/llvm-ir/mul.ll
@@ -268,7 +268,7 @@ entry:
; MM64R6: daddu $2, $[[T1]], $[[T0]]
; MM64R6-DAG: dmul $3, $5, $7
- ; MM32: lw $25, %call16(__multi3)($gp)
+ ; MM32: lw $25, %call16(__multi3)($16)
%r = mul i128 %a, %b
ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/sdiv.ll b/test/CodeGen/Mips/llvm-ir/sdiv.ll
index ee2b212a9f2f..defd25bb41ac 100644
--- a/test/CodeGen/Mips/llvm-ir/sdiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/sdiv.ll
@@ -172,7 +172,7 @@ entry:
; 64R6: ddiv $2, $4, $5
; 64R6: teq $5, $zero, 7
- ; MM32: lw $25, %call16(__divdi3)($gp)
+ ; MM32: lw $25, %call16(__divdi3)($2)
; MM64: ddiv $2, $4, $5
; MM64: teq $5, $zero, 7
@@ -184,7 +184,15 @@ entry:
define signext i128 @sdiv_i128(i128 signext %a, i128 signext %b) {
entry:
; ALL-LABEL: sdiv_i128:
- ; ALL: l{{w|d}} $25, %call16(__divti3)($gp)
+
+ ; GP32: lw $25, %call16(__divti3)($gp)
+
+ ; GP64-NOT-R6: ld $25, %call16(__divti3)($gp)
+ ; 64R6: ld $25, %call16(__divti3)($gp)
+
+ ; MM32: lw $25, %call16(__divti3)($16)
+
+ ; MM64: ld $25, %call16(__divti3)($2)
%r = sdiv i128 %a, %b
ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/srem.ll b/test/CodeGen/Mips/llvm-ir/srem.ll
index 812c10566979..42664d7457e5 100644
--- a/test/CodeGen/Mips/llvm-ir/srem.ll
+++ b/test/CodeGen/Mips/llvm-ir/srem.ll
@@ -164,7 +164,7 @@ entry:
; 64R6: dmod $2, $4, $5
; 64R6: teq $5, $zero, 7
- ; MM32: lw $25, %call16(__moddi3)($gp)
+ ; MM32: lw $25, %call16(__moddi3)($2)
; MM64: dmod $2, $4, $5
; MM64: teq $5, $zero, 7
@@ -177,7 +177,14 @@ define signext i128 @srem_i128(i128 signext %a, i128 signext %b) {
entry:
; ALL-LABEL: srem_i128:
- ; ALL: l{{w|d}} $25, %call16(__modti3)($gp)
+ ; GP32: lw $25, %call16(__modti3)($gp)
+
+ ; GP64-NOT-R6: ld $25, %call16(__modti3)($gp)
+ ; 64R6: ld $25, %call16(__modti3)($gp)
+
+ ; MM32: lw $25, %call16(__modti3)($16)
+
+ ; MM64: ld $25, %call16(__modti3)($2)
%r = srem i128 %a, %b
ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/udiv.ll b/test/CodeGen/Mips/llvm-ir/udiv.ll
index 6e078fdedfca..78ab36442a9a 100644
--- a/test/CodeGen/Mips/llvm-ir/udiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/udiv.ll
@@ -134,7 +134,7 @@ entry:
; 64R6: ddivu $2, $4, $5
; 64R6: teq $5, $zero, 7
- ; MM32: lw $25, %call16(__udivdi3)($gp)
+ ; MM32: lw $25, %call16(__udivdi3)($2)
; MM64: ddivu $2, $4, $5
; MM64: teq $5, $zero, 7
@@ -147,7 +147,14 @@ define signext i128 @udiv_i128(i128 signext %a, i128 signext %b) {
entry:
; ALL-LABEL: udiv_i128:
- ; ALL: l{{w|d}} $25, %call16(__udivti3)($gp)
+ ; GP32: lw $25, %call16(__udivti3)($gp)
+
+ ; GP64-NOT-R6: ld $25, %call16(__udivti3)($gp)
+ ; 64-R6: ld $25, %call16(__udivti3)($gp)
+
+ ; MM32: lw $25, %call16(__udivti3)($16)
+
+ ; MM64: ld $25, %call16(__udivti3)($2)
%r = udiv i128 %a, %b
ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/urem.ll b/test/CodeGen/Mips/llvm-ir/urem.ll
index 3bc82ceecd2a..160c126c7e3a 100644
--- a/test/CodeGen/Mips/llvm-ir/urem.ll
+++ b/test/CodeGen/Mips/llvm-ir/urem.ll
@@ -190,7 +190,7 @@ entry:
; 64R6: dmodu $2, $4, $5
; 64R6: teq $5, $zero, 7
- ; MM32: lw $25, %call16(__umoddi3)($gp)
+ ; MM32: lw $25, %call16(__umoddi3)($2)
; MM64: dmodu $2, $4, $5
; MM64: teq $5, $zero, 7
@@ -208,9 +208,9 @@ entry:
; GP64-NOT-R6: ld $25, %call16(__umodti3)($gp)
; 64R6: ld $25, %call16(__umodti3)($gp)
- ; MM32: lw $25, %call16(__umodti3)($gp)
+ ; MM32: lw $25, %call16(__umodti3)($16)
- ; MM64: ld $25, %call16(__umodti3)($gp)
+ ; MM64: ld $25, %call16(__umodti3)($2)
%r = urem i128 %a, %b
ret i128 %r
diff --git a/test/CodeGen/Mips/micromips-gp-rc.ll b/test/CodeGen/Mips/micromips-gp-rc.ll
index 16e55c357db6..f139f7a8486d 100644
--- a/test/CodeGen/Mips/micromips-gp-rc.ll
+++ b/test/CodeGen/Mips/micromips-gp-rc.ll
@@ -14,5 +14,5 @@ entry:
; Function Attrs: noreturn
declare void @exit(i32 signext)
-; CHECK: addu $gp, ${{[0-9]+}}
+; CHECK: move $gp, ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index 6fa506849ee6..564ffdd2f691 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n64 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n32 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N32
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n64 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n32 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N32
-; RUN: llc < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n32 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N32
-; RUN: llc < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n64 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n64 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n32 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n64 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n32 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n32 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n64 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N64
@f0 = common global float 0.000000e+00, align 4
@d0 = common global double 0.000000e+00, align 8
diff --git a/test/CodeGen/Mips/pbqp-reserved-physreg.ll b/test/CodeGen/Mips/pbqp-reserved-physreg.ll
new file mode 100644
index 000000000000..eedc51bd1e57
--- /dev/null
+++ b/test/CodeGen/Mips/pbqp-reserved-physreg.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=mips -regalloc=pbqp <%s > %t
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+
+; Function Attrs: nounwind
+define void @ham.928() local_unnamed_addr #0 align 2 {
+bb:
+ switch i32 undef, label %bb35 [
+ i32 1, label %bb18
+ i32 0, label %bb19
+ i32 3, label %bb20
+ i32 2, label %bb21
+ i32 4, label %bb17
+ ]
+
+bb17: ; preds = %bb
+ unreachable
+
+bb18: ; preds = %bb
+ unreachable
+
+bb19: ; preds = %bb
+ unreachable
+
+bb20: ; preds = %bb
+ unreachable
+
+bb21: ; preds = %bb
+ unreachable
+
+bb35: ; preds = %bb
+ unreachable
+}
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/Mips/return-vector.ll b/test/CodeGen/Mips/return-vector.ll
index 08eddf370096..c59695d18734 100644
--- a/test/CodeGen/Mips/return-vector.ll
+++ b/test/CodeGen/Mips/return-vector.ll
@@ -128,8 +128,11 @@ entry:
; CHECK-LABEL: call_f2:
; CHECK: call16(f2)
-; CHECK-NOT: lwc1
-; CHECK: add.s $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
+; CHECK: addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG: lwc1 $f[[F0:[0-9]]], [[O0]]($sp)
+; CHECK-DAG: lwc1 $f[[F1:[0-9]]], 20($sp)
+; CHECK: add.s $f0, $f[[F0]], $f[[F1]]
+
}
@@ -143,11 +146,12 @@ entry:
; CHECK-LABEL: call_d2:
; CHECK: call16(d2)
-; CHECK-NOT: ldc1
-; CHECK: add.d $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
-}
-
+; CHECK: addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG: ldc1 $f[[F0:[0-9]+]], 24($sp)
+; CHECK-DAG: ldc1 $f[[F1:[0-9]+]], [[O0]]($sp)
+; CHECK: add.d $f0, $f[[F1]], $f[[F0]]
+}
; Check that function returns vector on stack in cases when vector can't be
; returned in registers. Also check that vector is placed on stack starting
@@ -179,11 +183,12 @@ entry:
ret <4 x float> %vecins4
; CHECK-LABEL: return_f4:
-; CHECK-DAG: lwc1 $[[R0:[a-z0-9]+]], 16($sp)
-; CHECK-DAG: swc1 $[[R0]], 12($4)
+; CHECK-DAG: lwc1 $f[[R0:[0-9]+]], 16($sp)
+; CHECK-DAG: swc1 $f[[R0]], 12($4)
; CHECK-DAG: sw $7, 8($4)
; CHECK-DAG: sw $6, 4($4)
; CHECK-DAG: sw $5, 0($4)
+
}
@@ -227,8 +232,8 @@ entry:
ret <2 x float> %vecins2
; CHECK-LABEL: return_f2:
-; CHECK: mov.s $f0, $f12
-; CHECK: mov.s $f2, $f14
+; CHECK-DAG: sw $5, 0($4)
+; CHECK-DAG: sw $6, 4($4)
}
@@ -239,6 +244,10 @@ entry:
ret <2 x double> %vecins2
; CHECK-LABEL: return_d2:
-; CHECK: mov.d $f0, $f12
-; CHECK: mov.d $f2, $f14
+; CHECK-DAG: ldc1 $f[[F0:[0-9]]], 16($sp)
+; CHECK-DAG: sdc1 $f[[F0]], 8($4)
+; CHECK-DAG: mtc1 $6, $f[[F1:[0-9]+]]
+; CHECK-DAG: mtc1 $7, $f
+; CHECK-DAG: sdc1 $f[[F0]], 0($4)
+
}
diff --git a/test/CodeGen/Mips/tailcall/tailcall.ll b/test/CodeGen/Mips/tailcall/tailcall.ll
index 01a9b64ba63c..3f04e1cf3053 100644
--- a/test/CodeGen/Mips/tailcall/tailcall.ll
+++ b/test/CodeGen/Mips/tailcall/tailcall.ll
@@ -176,7 +176,7 @@ entry:
; ALL-LABEL: caller8_1:
; PIC32: jalr $25
; PIC32R6: jalr $25
-; PIC32MM: jalr{{.*}} $25
+; PIC32MM: jalr $25
; STATIC32: jal
; PIC64: jalr $25
; STATIC64: jal
@@ -288,7 +288,7 @@ entry:
; ALL-LABEL: caller13:
; PIC32: jalr $25
; PIC32R6: jalr $25
-; PIC32MM: jalr{{.*}} $25
+; PIC32MM: jalr $25
; STATIC32: jal
; STATIC64: jal
; PIC64R6: jalr $25
diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll b/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll
new file mode 100644
index 000000000000..14669b9005b7
--- /dev/null
+++ b/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=32442
+; Don't generate zero extension for the return value.
+; CHECK-NOT: clrldi
+
+define zeroext i1 @foo(i32 signext %i, i32* %p) {
+entry:
+ %cmp = icmp eq i32 %i, 0
+ br i1 %cmp, label %return, label %if.end
+
+if.end:
+ store i32 %i, i32* %p, align 4
+ br label %return
+
+return:
+ %retval = phi i1 [ true, %if.end ], [ false, %entry ]
+ ret i1 %retval
+}
diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest.ll b/test/CodeGen/PowerPC/BoolRetToIntTest.ll
index 4a0966b2859f..fd515281e394 100644
--- a/test/CodeGen/PowerPC/BoolRetToIntTest.ll
+++ b/test/CodeGen/PowerPC/BoolRetToIntTest.ll
@@ -31,14 +31,14 @@ for.body: ; preds = %for.body.preheader,
br i1 %call, label %cleanup.loopexit, label %for.cond
cleanup.loopexit: ; preds = %for.body, %for.cond
-; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ]
%cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
br label %cleanup
cleanup: ; preds = %cleanup.loopexit, %entry
-; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
%cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
; CHECK: ret i1 [[REG]]
ret i1 %cleanup.dest.slot.0
}
@@ -78,14 +78,14 @@ for.body: ; preds = %for.body.preheader,
br i1 %call, label %cleanup.loopexit, label %for.cond
cleanup.loopexit: ; preds = %for.body, %for.cond
-; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ]
%cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
br label %cleanup
cleanup: ; preds = %cleanup.loopexit, %entry
-; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
%cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
; CHECK: call void %cont(i1 [[REG]]
tail call void %cont(i1 %cleanup.dest.slot.0)
ret void
@@ -112,17 +112,17 @@ for.body: ; preds = %for.body.preheader,
br i1 %call, label %cleanup.loopexit, label %for.cond
cleanup.loopexit: ; preds = %for.body, %for.cond
-; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ]
%cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
br label %cleanup
cleanup: ; preds = %cleanup.loopexit, %entry
-; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
%cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
; CHECK: call void %cont(i1 [[REG]]
tail call void %cont(i1 %cleanup.dest.slot.0)
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
; CHECK: ret i1 [[REG]]
ret i1 %cleanup.dest.slot.0
}
@@ -136,7 +136,7 @@ foo:
br label %cleanup
cleanup:
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
; CHECK: ret i1 [[REG]]
%result = phi i1 [ false, %foo ], [ %operand, %entry ]
ret i1 %result
@@ -186,7 +186,7 @@ foo:
; CHECK-LABEL: cleanup
cleanup:
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
; CHECK: ret i1 [[REG]]
%result = phi i1 [ %bar, %foo], [ %operand, %entry ]
ret i1 %result
@@ -198,8 +198,8 @@ declare zeroext i1 @return_i1()
define zeroext i1 @call_test() {
; CHECK: [[REG:%.+]] = call i1
%result = call i1 @return_i1()
-; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i32
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i64
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
; CHECK: ret i1 [[REG]]
ret i1 %result
}
diff --git a/test/CodeGen/PowerPC/crbits.ll b/test/CodeGen/PowerPC/crbits.ll
index a85237195c5e..4ae91d1163a4 100644
--- a/test/CodeGen/PowerPC/crbits.ll
+++ b/test/CodeGen/PowerPC/crbits.ll
@@ -94,13 +94,15 @@ entry:
ret i1 %or7
; CHECK-LABEL: @test5
+; CHECK-DAG: li [[NEG2:[0-9]+]], -2
; CHECK-DAG: and [[REG1:[0-9]+]], 3, 4
-; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2
-; CHECK-DAG: li [[REG3:[0-9]+]], 1
-; CHECK-DAG: andi. {{[0-9]+}}, [[REG1]], 1
-; CHECK-DAG: crandc [[REG5:[0-9]+]],
-; CHECK: isel 3, 0, [[REG3]], [[REG5]]
-; CHECK: blr
+; CHECK-DAG: xor [[NE1:[0-9]+]], 5, [[NEG2]]
+; CHECK-DAG: clrldi [[TRUNC:[0-9]+]], [[REG1]], 63
+; CHECK-DAG: cntlzw [[NE2:[0-9]+]], [[NE1]]
+; CHECK: srwi [[NE3:[0-9]+]], [[NE2]], 5
+; CHECK: xori [[NE4:[0-9]+]], [[NE3]], 1
+; CHECK: or 3, [[TRUNC]], [[NE4]]
+; CHECK-NEXT: blr
}
; Function Attrs: nounwind readnone
@@ -112,15 +114,16 @@ entry:
ret i1 %and7
; CHECK-LABEL: @test6
-; CHECK-DAG: andi. {{[0-9]+}}, 3, 1
-; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2
-; CHECK-DAG: crmove [[REG1:[0-9]+]], 1
-; CHECK-DAG: andi. {{[0-9]+}}, 4, 1
-; CHECK-DAG: li [[REG2:[0-9]+]], 1
-; CHECK-DAG: crorc [[REG4:[0-9]+]], 1,
-; CHECK-DAG: crnand [[REG5:[0-9]+]], [[REG4]], [[REG1]]
-; CHECK: isel 3, 0, [[REG2]], [[REG5]]
-; CHECK: blr
+; CHECK-DAG: li [[NEG2:[0-9]+]], -2
+; CHECK-DAG: clrldi [[CLR1:[0-9]+]], 4, 63
+; CHECK-DAG: clrldi [[CLR2:[0-9]+]], 3, 63
+; CHECK-DAG: xor [[NE1:[0-9]+]], 5, [[NEG2]]
+; CHECK-DAG: cntlzw [[NE2:[0-9]+]], [[NE1]]
+; CHECK: srwi [[NE3:[0-9]+]], [[NE2]], 5
+; CHECK: xori [[NE4:[0-9]+]], [[NE3]], 1
+; CHECK: or [[OR:[0-9]+]], [[NE4]], [[CLR1]]
+; CHECK: and 3, [[OR]], [[CLR2]]
+; CHECK-NEXT: blr
}
; Function Attrs: nounwind readnone
@@ -187,12 +190,13 @@ entry:
ret i32 %and
; CHECK-LABEL: @test10
-; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0
-; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0
-; CHECK-DAG: li [[REG2:[0-9]+]], 1
-; CHECK-DAG: crorc [[REG3:[0-9]+]],
-; CHECK: isel 3, 0, [[REG2]], [[REG3]]
-; CHECK: blr
+; CHECK-DAG: cntlzw 3, 3
+; CHECK-DAG: cntlzw 4, 4
+; CHECK-DAG: srwi 3, 3, 5
+; CHECK-DAG: srwi 4, 4, 5
+; CHECK: xori 3, 3, 1
+; CHECK: and 3, 3, 4
+; CHECK-NEXT: blr
}
attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/PowerPC/logic-ops-on-compares.ll b/test/CodeGen/PowerPC/logic-ops-on-compares.ll
index df021c20ea86..5a507e9ff678 100644
--- a/test/CodeGen/PowerPC/logic-ops-on-compares.ll
+++ b/test/CodeGen/PowerPC/logic-ops-on-compares.ll
@@ -40,8 +40,8 @@ return: ; preds = %if.end, %if.then
ret i32 %retval.0
}
-define void @neg_truncate_i32(i32 *%ptr) {
-; CHECK-LABEL: neg_truncate_i32:
+define void @neg_truncate_i32_eq(i32 *%ptr) {
+; CHECK-LABEL: neg_truncate_i32_eq:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: lwz r3, 0(r3)
; CHECK-NEXT: rldicl. r3, r3, 0, 63
@@ -66,8 +66,8 @@ if.end29: ; preds = %if.else
}
; Function Attrs: nounwind
-define i64 @logic_ne_64(i64 %a, i64 %b, i64 %c) {
-; CHECK-LABEL: logic_ne_64:
+define i64 @logic_eq_64(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: logic_eq_64:
; CHECK: xor r7, r3, r4
; CHECK-NEXT: li r6, 55
; CHECK-NEXT: xor r5, r5, r6
@@ -99,8 +99,8 @@ return: ; preds = %if.end, %if.then
ret i64 %retval.0
}
-define void @neg_truncate_i64(i64 *%ptr) {
-; CHECK-LABEL: neg_truncate_i64:
+define void @neg_truncate_i64_eq(i64 *%ptr) {
+; CHECK-LABEL: neg_truncate_i64_eq:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: ld r3, 0(r3)
; CHECK-NEXT: rldicl. r3, r3, 0, 63
@@ -124,6 +124,67 @@ if.end29: ; preds = %if.else
}
+; Function Attrs: nounwind
+define i64 @logic_ne_64(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: logic_ne_64:
+; CHECK: xor r7, r3, r4
+; CHECK-NEXT: li r6, 55
+; CHECK-NEXT: addic r8, r7, -1
+; CHECK-NEXT: xor r5, r5, r6
+; CHECK-NEXT: subfe r7, r8, r7
+; CHECK-NEXT: cntlzd r5, r5
+; CHECK-NEXT: addic r12, r4, -1
+; CHECK-NEXT: rldicl r5, r5, 58, 63
+; CHECK-NEXT: subfe r6, r12, r4
+; CHECK-NEXT: and r6, r7, r6
+; CHECK-NEXT: or. r5, r6, r5
+; CHECK-NEXT: bc 4, 1
+entry:
+ %tobool = icmp ne i64 %a, %b
+ %tobool1 = icmp ne i64 %b, 0
+ %or.cond = and i1 %tobool, %tobool1
+ %tobool3 = icmp eq i64 %c, 55
+ %or.cond5 = or i1 %or.cond, %tobool3
+ br i1 %or.cond5, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %call = tail call i64 @foo64(i64 %a) #2
+ br label %return
+
+if.end: ; preds = %entry
+ %call4 = tail call i64 @bar64(i64 %b) #2
+ br label %return
+
+return: ; preds = %if.end, %if.then
+ %retval.0 = phi i64 [ %call4, %if.end ], [ %call, %if.then ]
+ ret i64 %retval.0
+}
+
+define void @neg_truncate_i64_ne(i64 *%ptr) {
+; CHECK-LABEL: neg_truncate_i64_ne:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: ld r3, 0(r3)
+; CHECK-NEXT: andi. r3, r3, 1
+; CHECK-NEXT: bclr 12, 1, 0
+; CHECK-NEXT: # BB#1: # %if.end29.thread136
+; CHECK-NEXT: .LBB5_2: # %if.end29
+entry:
+ %0 = load i64, i64* %ptr, align 4
+ %rem17127 = and i64 %0, 1
+ %cmp18 = icmp ne i64 %rem17127, 0
+ br label %if.else
+
+if.else: ; preds = %entry
+ br i1 %cmp18, label %if.end29, label %if.end29.thread136
+
+if.end29.thread136: ; preds = %if.else
+ unreachable
+
+if.end29: ; preds = %if.else
+ ret void
+
+}
+
declare signext i32 @foo(i32 signext)
declare signext i32 @bar(i32 signext)
declare i64 @foo64(i64)
diff --git a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
index 3095429758f6..ad9078c82066 100644
--- a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
@@ -11,111 +12,237 @@ target triple = "powerpc64le-unknown-linux-gnu"
@zeroEqualityTest04.buffer1 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14], align 4
@zeroEqualityTest04.buffer2 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 13], align 4
-; Function Attrs: nounwind readonly
declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1
-; Validate with if(memcmp())
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest01() local_unnamed_addr #0 {
-entry:
- %call = tail call signext i32 @memcmp(i8* bitcast ([3 x i32]* @zeroEqualityTest01.buffer1 to i8*), i8* bitcast ([3 x i32]* @zeroEqualityTest01.buffer2 to i8*), i64 16)
- %not.tobool = icmp ne i32 %call, 0
- %. = zext i1 %not.tobool to i32
+; Check 4 bytes - requires 1 load for each param.
+define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) {
+; CHECK-LABEL: zeroEqualityTest02:
+; CHECK: # BB#0:
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: lwz 4, 0(4)
+; CHECK-NEXT: xor 3, 3, 4
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: xori 3, 3, 1
+; CHECK-NEXT: blr
+ %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 4)
+ %not.cmp = icmp ne i32 %call, 0
+ %. = zext i1 %not.cmp to i32
ret i32 %.
-
- ; CHECK-LABEL: @zeroEqualityTest01
- ; CHECK-LABEL: %res_block
- ; CHECK: li 3, 1
- ; CHECK-NEXT: clrldi
- ; CHECK-NEXT: blr
- ; CHECK: li 3, 0
- ; CHECK-NEXT: clrldi
- ; CHECK-NEXT: blr
}
-; Validate with if(memcmp() == 0)
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest02() local_unnamed_addr #0 {
-entry:
- %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)
- %not.cmp = icmp ne i32 %call, 0
- %. = zext i1 %not.cmp to i32
+; Check 16 bytes - requires 2 loads for each param (or use vectors?).
+define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) {
+; CHECK-LABEL: zeroEqualityTest01:
+; CHECK: # BB#0: # %loadbb
+; CHECK-NEXT: ld 5, 0(3)
+; CHECK-NEXT: ld 6, 0(4)
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: bne 0, .LBB1_2
+; CHECK-NEXT: # BB#1: # %loadbb1
+; CHECK-NEXT: ld 3, 8(3)
+; CHECK-NEXT: ld 4, 8(4)
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: beq 0, .LBB1_3
+; CHECK-NEXT: .LBB1_2: # %res_block
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB1_3: # %endblock
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+ %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16)
+ %not.tobool = icmp ne i32 %call, 0
+ %. = zext i1 %not.tobool to i32
ret i32 %.
+}
- ; CHECK-LABEL: @zeroEqualityTest02
- ; CHECK-LABEL: %res_block
- ; CHECK: li 3, 1
- ; CHECK-NEXT: clrldi
- ; CHECK-NEXT: blr
- ; CHECK: li 3, 0
- ; CHECK-NEXT: clrldi
- ; CHECK-NEXT: blr
+; Check 7 bytes - requires 3 loads for each param.
+define signext i32 @zeroEqualityTest03(i8* %x, i8* %y) {
+; CHECK-LABEL: zeroEqualityTest03:
+; CHECK: # BB#0: # %loadbb
+; CHECK-NEXT: lwz 5, 0(3)
+; CHECK-NEXT: lwz 6, 0(4)
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne 0, .LBB2_3
+; CHECK-NEXT: # BB#1: # %loadbb1
+; CHECK-NEXT: lhz 5, 4(3)
+; CHECK-NEXT: lhz 6, 4(4)
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne 0, .LBB2_3
+; CHECK-NEXT: # BB#2: # %loadbb2
+; CHECK-NEXT: lbz 3, 6(3)
+; CHECK-NEXT: lbz 4, 6(4)
+; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: beq 0, .LBB2_4
+; CHECK-NEXT: .LBB2_3: # %res_block
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB2_4: # %endblock
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+ %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7)
+ %not.lnot = icmp ne i32 %call, 0
+ %cond = zext i1 %not.lnot to i32
+ ret i32 %cond
}
; Validate with > 0
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest03() local_unnamed_addr #0 {
-entry:
+define signext i32 @zeroEqualityTest04() {
+; CHECK-LABEL: zeroEqualityTest04:
+; CHECK: # BB#0: # %loadbb
+; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest02.buffer1@toc@ha
+; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest02.buffer2@toc@ha
+; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest02.buffer1@toc@l
+; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l
+; CHECK-NEXT: ldbrx 3, 0, 6
+; CHECK-NEXT: ldbrx 4, 0, 5
+; CHECK-NEXT: subf. 7, 4, 3
+; CHECK-NEXT: bne 0, .LBB3_2
+; CHECK-NEXT: # BB#1: # %loadbb1
+; CHECK-NEXT: li 4, 8
+; CHECK-NEXT: ldbrx 3, 6, 4
+; CHECK-NEXT: ldbrx 4, 5, 4
+; CHECK-NEXT: subf. 5, 4, 3
+; CHECK-NEXT: beq 0, .LBB3_4
+; CHECK-NEXT: .LBB3_2: # %res_block
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: li 12, -1
+; CHECK-NEXT: isel 3, 12, 3, 0
+; CHECK-NEXT: .LBB3_3: # %endblock
+; CHECK-NEXT: cmpwi 3, 1
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: li 4, 1
+; CHECK-NEXT: isel 3, 4, 3, 0
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: b .LBB3_3
%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)
%not.cmp = icmp slt i32 %call, 1
%. = zext i1 %not.cmp to i32
ret i32 %.
-
- ; CHECK-LABEL: @zeroEqualityTest03
- ; CHECK-LABEL: %res_block
- ; CHECK: cmpld
- ; CHECK-NEXT: li [[LI:[0-9]+]], 1
- ; CHECK-NEXT: li [[LI2:[0-9]+]], -1
- ; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0
}
; Validate with < 0
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest04() local_unnamed_addr #0 {
-entry:
+define signext i32 @zeroEqualityTest05() {
+; CHECK-LABEL: zeroEqualityTest05:
+; CHECK: # BB#0: # %loadbb
+; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest03.buffer1@toc@ha
+; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest03.buffer2@toc@ha
+; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest03.buffer1@toc@l
+; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l
+; CHECK-NEXT: ldbrx 3, 0, 6
+; CHECK-NEXT: ldbrx 4, 0, 5
+; CHECK-NEXT: subf. 7, 4, 3
+; CHECK-NEXT: bne 0, .LBB4_2
+; CHECK-NEXT: # BB#1: # %loadbb1
+; CHECK-NEXT: li 4, 8
+; CHECK-NEXT: ldbrx 3, 6, 4
+; CHECK-NEXT: ldbrx 4, 5, 4
+; CHECK-NEXT: subf. 5, 4, 3
+; CHECK-NEXT: beq 0, .LBB4_4
+; CHECK-NEXT: .LBB4_2: # %res_block
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: li 12, -1
+; CHECK-NEXT: isel 3, 12, 3, 0
+; CHECK-NEXT: .LBB4_3: # %endblock
+; CHECK-NEXT: srwi 3, 3, 31
+; CHECK-NEXT: xori 3, 3, 1
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+; CHECK-NEXT: .LBB4_4:
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: b .LBB4_3
%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16)
%call.lobit = lshr i32 %call, 31
%call.lobit.not = xor i32 %call.lobit, 1
ret i32 %call.lobit.not
-
- ; CHECK-LABEL: @zeroEqualityTest04
- ; CHECK-LABEL: %res_block
- ; CHECK: cmpld
- ; CHECK-NEXT: li [[LI:[0-9]+]], 1
- ; CHECK-NEXT: li [[LI2:[0-9]+]], -1
- ; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0
}
; Validate with memcmp()?:
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest05() local_unnamed_addr #0 {
-entry:
+define signext i32 @equalityFoldTwoConstants() {
+; CHECK-LABEL: equalityFoldTwoConstants:
+; CHECK: # BB#0: # %loadbb
+; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha
+; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha
+; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l(3)
+; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l(4)
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: bne 0, .LBB5_2
+; CHECK-NEXT: # BB#1: # %loadbb1
+; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha+8
+; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha+8
+; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l+8(3)
+; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l+8(4)
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: beq 0, .LBB5_3
+; CHECK-NEXT: .LBB5_2: # %res_block
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: .LBB5_3: # %endblock
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16)
%not.tobool = icmp eq i32 %call, 0
%cond = zext i1 %not.tobool to i32
ret i32 %cond
-
- ; CHECK-LABEL: @zeroEqualityTest05
- ; CHECK-LABEL: %res_block
- ; CHECK: li 3, 1
- ; CHECK: li 3, 0
}
-; Validate with !memcmp()?:
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest06() local_unnamed_addr #0 {
-entry:
- %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16)
- %not.lnot = icmp ne i32 %call, 0
- %cond = zext i1 %not.lnot to i32
+define signext i32 @equalityFoldOneConstant(i8* %X) {
+; CHECK-LABEL: equalityFoldOneConstant:
+; CHECK: # BB#0: # %loadbb
+; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha
+; CHECK-NEXT: ld 5, 0(3)
+; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer1@toc@l(4)
+; CHECK-NEXT: cmpld 4, 5
+; CHECK-NEXT: bne 0, .LBB6_2
+; CHECK-NEXT: # BB#1: # %loadbb1
+; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha+8
+; CHECK-NEXT: ld 3, 8(3)
+; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer1@toc@l+8(4)
+; CHECK-NEXT: cmpld 4, 3
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: beq 0, .LBB6_3
+; CHECK-NEXT: .LBB6_2: # %res_block
+; CHECK-NEXT: li 3, 1
+; CHECK-NEXT: .LBB6_3: # %endblock
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: blr
+ %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* %X, i64 16)
+ %not.tobool = icmp eq i32 %call, 0
+ %cond = zext i1 %not.tobool to i32
ret i32 %cond
+}
- ; CHECK-LABEL: @zeroEqualityTest06
- ; CHECK-LABEL: %res_block
- ; CHECK: li 3, 1
- ; CHECK-NEXT: clrldi
- ; CHECK-NEXT: blr
- ; CHECK: li 3, 0
- ; CHECK-NEXT: clrldi
- ; CHECK-NEXT: blr
+define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) {
+; CHECK-LABEL: length2_eq_nobuiltin_attr:
+; CHECK: # BB#0:
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: std 0, 16(1)
+; CHECK-NEXT: stdu 1, -32(1)
+; CHECK-NEXT: .Lcfi0:
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .Lcfi1:
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: li 5, 2
+; CHECK-NEXT: bl memcmp
+; CHECK-NEXT: nop
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
+; CHECK-NEXT: addi 1, 1, 32
+; CHECK-NEXT: ld 0, 16(1)
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
+ %m = tail call signext i32 @memcmp(i8* %X, i8* %Y, i64 2) nobuiltin
+ %c = icmp eq i32 %m, 0
+ ret i1 %c
}
+
diff --git a/test/CodeGen/PowerPC/ppc-crbits-onoff.ll b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
index 0e7f8f1bc668..c403b5ac2e5a 100644
--- a/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
+++ b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
@@ -37,17 +37,13 @@ entry:
; CHECK-LABEL: @crbitson
; CHECK-NO-ISEL-LABEL: @crbitson
-; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0
-; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0
-; CHECK-DAG: li [[REG2:[0-9]+]], 1
-; CHECK-DAG: crorc [[REG3:[0-9]+]],
-; CHECK: isel 3, 0, [[REG2]], [[REG3]]
-; CHECK-NO-ISEL: bc 12, 20, [[TRUE:.LBB[0-9]+]]
-; CHECK-NO-ISEL-NEXT: blr
-; CHECK-NO-ISEL: [[TRUE]]
-; CHECK-NO-ISEL-NEXT: addi 3, 0, 0
-; CHECK-NO-ISEL-NEXT: blr
-; CHECK: blr
+; CHECK-DAG: cntlzw [[REG1:[0-9]+]], 3
+; CHECK-DAG: cntlzw [[REG2:[0-9]+]], 4
+; CHECK: srwi [[REG3:[0-9]+]], [[REG1]], 5
+; CHECK: srwi [[REG4:[0-9]+]], [[REG2]], 5
+; CHECK: xori [[REG5:[0-9]+]], [[REG3]], 1
+; CHECK: and 3, [[REG5]], [[REG4]]
+; CHECK-NEXT: blr
}
diff --git a/test/CodeGen/PowerPC/setcc-logic.ll b/test/CodeGen/PowerPC/setcc-logic.ll
index a5a86f101a94..8a6f4975ec97 100644
--- a/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/test/CodeGen/PowerPC/setcc-logic.ll
@@ -59,8 +59,8 @@ define zeroext i1 @any_bits_set(i32 %P, i32 %Q) {
; CHECK: # BB#0:
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: cntlzw 3, 3
-; CHECK-NEXT: nor 3, 3, 3
-; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%a = icmp ne i32 %P, 0
%b = icmp ne i32 %Q, 0
@@ -83,10 +83,12 @@ define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) {
define zeroext i1 @any_bits_clear(i32 %P, i32 %Q) {
; CHECK-LABEL: any_bits_clear:
; CHECK: # BB#0:
+; CHECK-NEXT: li 5, -1
; CHECK-NEXT: and 3, 3, 4
-; CHECK-NEXT: li 5, 1
-; CHECK-NEXT: cmpwi 0, 3, -1
-; CHECK-NEXT: isel 3, 0, 5, 2
+; CHECK-NEXT: xor 3, 3, 5
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%a = icmp ne i32 %P, -1
%b = icmp ne i32 %Q, -1
@@ -452,8 +454,8 @@ define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-NEXT: xor 3, 3, 4
; CHECK-NEXT: or 3, 3, 5
; CHECK-NEXT: cntlzw 3, 3
-; CHECK-NEXT: nor 3, 3, 3
-; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
%cmp1 = icmp ne i32 %a, %b
%cmp2 = icmp ne i32 %c, %d
diff --git a/test/CodeGen/PowerPC/testComparesinesc.ll b/test/CodeGen/PowerPC/testComparesinesc.ll
new file mode 100644
index 000000000000..e6ade339573b
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesinesc.ll
@@ -0,0 +1,121 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i8 0, align 1
+
+define signext i32 @test_inesc(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: test_inesc:
+; CHECK: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, %b
+ %conv2 = zext i1 %cmp to i32
+ ret i32 %conv2
+}
+
+define signext i32 @test_inesc_sext(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: test_inesc_sext:
+; CHECK: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, %b
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define signext i32 @test_inesc_z(i8 signext %a) {
+; CHECK-LABEL: test_inesc_z:
+; CHECK: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, 0
+ %conv1 = zext i1 %cmp to i32
+ ret i32 %conv1
+}
+
+define signext i32 @test_inesc_sext_z(i8 signext %a) {
+; CHECK-LABEL: test_inesc_sext_z:
+; CHECK: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, 0
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define void @test_inesc_store(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: test_inesc_store:
+; CHECK: xor r3, r3, r4
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: stb r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, %b
+ %conv3 = zext i1 %cmp to i8
+ store i8 %conv3, i8* @glob, align 1
+ ret void
+}
+
+define void @test_inesc_sext_store(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: test_inesc_sext_store:
+; CHECK: xor r3, r3, r4
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: neg r3, r3
+; CHECK: stb r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, %b
+ %conv3 = sext i1 %cmp to i8
+ store i8 %conv3, i8* @glob, align 1
+ ret void
+}
+
+define void @test_inesc_z_store(i8 signext %a) {
+; CHECK-LABEL: test_inesc_z_store:
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: stb r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, 0
+ %conv2 = zext i1 %cmp to i8
+ store i8 %conv2, i8* @glob, align 1
+ ret void
+}
+
+define void @test_inesc_sext_z_store(i8 signext %a) {
+; CHECK-LABEL: test_inesc_sext_z_store:
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: neg r3, r3
+; CHECK: stb r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, 0
+ %conv2 = sext i1 %cmp to i8
+ store i8 %conv2, i8* @glob, align 1
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesinesi.ll b/test/CodeGen/PowerPC/testComparesinesi.ll
new file mode 100644
index 000000000000..ad9431c09e33
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesinesi.ll
@@ -0,0 +1,121 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i32 0, align 4
+
+define signext i32 @test_inesi(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: test_inesi:
+; CHECK: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define signext i32 @test_inesi_sext(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: test_inesi_sext:
+; CHECK: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, %b
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define signext i32 @test_inesi_z(i32 signext %a) {
+; CHECK-LABEL: test_inesi_z:
+; CHECK: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define signext i32 @test_inesi_sext_z(i32 signext %a) {
+; CHECK-LABEL: test_inesi_sext_z:
+; CHECK: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, 0
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define void @test_inesi_store(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: test_inesi_store:
+; CHECK: xor r3, r3, r4
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: stw r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, i32* @glob, align 4
+ ret void
+}
+
+define void @test_inesi_sext_store(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: test_inesi_sext_store:
+; CHECK: xor r3, r3, r4
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: neg r3, r3
+; CHECK: stw r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, %b
+ %sub = sext i1 %cmp to i32
+ store i32 %sub, i32* @glob, align 4
+ ret void
+}
+
+define void @test_inesi_z_store(i32 signext %a) {
+; CHECK-LABEL: test_inesi_z_store:
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: stw r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, 0
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, i32* @glob, align 4
+ ret void
+}
+
+define void @test_inesi_sext_z_store(i32 signext %a) {
+; CHECK-LABEL: test_inesi_sext_z_store:
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: neg r3, r3
+; CHECK: stw r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, 0
+ %sub = sext i1 %cmp to i32
+ store i32 %sub, i32* @glob, align 4
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesinesll.ll b/test/CodeGen/PowerPC/testComparesinesll.ll
new file mode 100644
index 000000000000..9e9369455857
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesinesll.ll
@@ -0,0 +1,125 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i64 0, align 8
+
+define signext i32 @test_inesll(i64 %a, i64 %b) {
+; CHECK-LABEL: test_inesll:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: addic r4, r3, -1
+; CHECK-NEXT: subfe r3, r4, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define signext i32 @test_inesll_sext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_inesll_sext:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define signext i32 @test_inesll_z(i64 %a) {
+; CHECK-LABEL: test_inesll_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addic r4, r3, -1
+; CHECK-NEXT: subfe r3, r4, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define signext i32 @test_inesll_sext_z(i64 %a) {
+; CHECK-LABEL: test_inesll_sext_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define void @test_inesll_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_inesll_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT: addic r5, r3, -1
+; CHECK-NEXT: subfe r3, r5, r3
+; CHECK-NEXT: std r3, 0(r12)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = zext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_inesll_sext_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_inesll_sext_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: std r3, 0(r12)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = sext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_inesll_z_store(i64 %a) {
+; CHECK-LABEL: test_inesll_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: addic r5, r3, -1
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: subfe r3, r5, r3
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = zext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_inesll_sext_z_store(i64 %a) {
+; CHECK-LABEL: test_inesll_sext_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = sext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesiness.ll b/test/CodeGen/PowerPC/testComparesiness.ll
new file mode 100644
index 000000000000..56b7a6ab3974
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesiness.ll
@@ -0,0 +1,121 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i16 0, align 2
+
+define signext i32 @test_iness(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: test_iness:
+; CHECK: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, %b
+ %conv2 = zext i1 %cmp to i32
+ ret i32 %conv2
+}
+
+define signext i32 @test_iness_sext(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: test_iness_sext:
+; CHECK: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, %b
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define signext i32 @test_iness_z(i16 signext %a) {
+; CHECK-LABEL: test_iness_z:
+; CHECK: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, 0
+ %conv1 = zext i1 %cmp to i32
+ ret i32 %conv1
+}
+
+define signext i32 @test_iness_sext_z(i16 signext %a) {
+; CHECK-LABEL: test_iness_sext_z:
+; CHECK: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, 0
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define void @test_iness_store(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: test_iness_store:
+; CHECK: xor r3, r3, r4
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: sth r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, %b
+ %conv3 = zext i1 %cmp to i16
+ store i16 %conv3, i16* @glob, align 2
+ ret void
+}
+
+define void @test_iness_sext_store(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: test_iness_sext_store:
+; CHECK: xor r3, r3, r4
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: neg r3, r3
+; CHECK: sth r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, %b
+ %conv3 = sext i1 %cmp to i16
+ store i16 %conv3, i16* @glob, align 2
+ ret void
+}
+
+define void @test_iness_z_store(i16 signext %a) {
+; CHECK-LABEL: test_iness_z_store:
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: sth r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, 0
+ %conv2 = zext i1 %cmp to i16
+ store i16 %conv2, i16* @glob, align 2
+ ret void
+}
+
+define void @test_iness_sext_z_store(i16 signext %a) {
+; CHECK-LABEL: test_iness_sext_z_store:
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: neg r3, r3
+; CHECK: sth r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, 0
+ %conv2 = sext i1 %cmp to i16
+ store i16 %conv2, i16* @glob, align 2
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesineuc.ll b/test/CodeGen/PowerPC/testComparesineuc.ll
new file mode 100644
index 000000000000..1cba13f12292
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesineuc.ll
@@ -0,0 +1,136 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+@glob = common local_unnamed_addr global i8 0, align 1
+
+define signext i32 @test_ineuc(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: test_ineuc:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, %b
+ %conv2 = zext i1 %cmp to i32
+ ret i32 %conv2
+}
+
+define signext i32 @test_ineuc_sext(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: test_ineuc_sext:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, %b
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define signext i32 @test_ineuc_z(i8 zeroext %a) {
+; CHECK-LABEL: test_ineuc_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, 0
+ %conv1 = zext i1 %cmp to i32
+ ret i32 %conv1
+}
+
+define signext i32 @test_ineuc_sext_z(i8 zeroext %a) {
+; CHECK-LABEL: test_ineuc_sext_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, 0
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define void @test_ineuc_store(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: test_ineuc_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r5)
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: stb r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, %b
+ %conv3 = zext i1 %cmp to i8
+ store i8 %conv3, i8* @glob, align 1
+ ret void
+}
+
+define void @test_ineuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: test_ineuc_sext_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r5)
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: stb r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, %b
+ %conv3 = sext i1 %cmp to i8
+ store i8 %conv3, i8* @glob, align 1
+ ret void
+}
+
+define void @test_ineuc_z_store(i8 zeroext %a) {
+; CHECK-LABEL: test_ineuc_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: stb r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, 0
+ %conv2 = zext i1 %cmp to i8
+ store i8 %conv2, i8* @glob, align 1
+ ret void
+}
+
+define void @test_ineuc_sext_z_store(i8 zeroext %a) {
+; CHECK-LABEL: test_ineuc_sext_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: stb r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i8 %a, 0
+ %conv2 = sext i1 %cmp to i8
+ store i8 %conv2, i8* @glob, align 1
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesineui.ll b/test/CodeGen/PowerPC/testComparesineui.ll
new file mode 100644
index 000000000000..36899b7ea8e1
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesineui.ll
@@ -0,0 +1,121 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i32 0, align 4
+
+define signext i32 @test_ineui(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: test_ineui:
+; CHECK: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define signext i32 @test_ineui_sext(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: test_ineui_sext:
+; CHECK: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, %b
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define signext i32 @test_ineui_z(i32 zeroext %a) {
+; CHECK-LABEL: test_ineui_z:
+; CHECK: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define signext i32 @test_ineui_sext_z(i32 zeroext %a) {
+; CHECK-LABEL: test_ineui_sext_z:
+; CHECK: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, 0
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define void @test_ineui_store(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: test_ineui_store:
+; CHECK: xor r3, r3, r4
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: stw r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, i32* @glob, align 4
+ ret void
+}
+
+define void @test_ineui_sext_store(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: test_ineui_sext_store:
+; CHECK: xor r3, r3, r4
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: neg r3, r3
+; CHECK: stw r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, %b
+ %sub = sext i1 %cmp to i32
+ store i32 %sub, i32* @glob, align 4
+ ret void
+}
+
+define void @test_ineui_z_store(i32 zeroext %a) {
+; CHECK-LABEL: test_ineui_z_store:
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: stw r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, 0
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, i32* @glob, align 4
+ ret void
+}
+
+define void @test_ineui_sext_z_store(i32 zeroext %a) {
+; CHECK-LABEL: test_ineui_sext_z_store:
+; CHECK: cntlzw r3, r3
+; CHECK: srwi r3, r3, 5
+; CHECK: xori r3, r3, 1
+; CHECK: neg r3, r3
+; CHECK: stw r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i32 %a, 0
+ %sub = sext i1 %cmp to i32
+ store i32 %sub, i32* @glob, align 4
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesineull.ll b/test/CodeGen/PowerPC/testComparesineull.ll
new file mode 100644
index 000000000000..7f0fed15157c
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesineull.ll
@@ -0,0 +1,125 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i64 0, align 8
+
+define signext i32 @test_ineull(i64 %a, i64 %b) {
+; CHECK-LABEL: test_ineull:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: addic r4, r3, -1
+; CHECK-NEXT: subfe r3, r4, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define signext i32 @test_ineull_sext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_ineull_sext:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define signext i32 @test_ineull_z(i64 %a) {
+; CHECK-LABEL: test_ineull_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addic r4, r3, -1
+; CHECK-NEXT: subfe r3, r4, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define signext i32 @test_ineull_sext_z(i64 %a) {
+; CHECK-LABEL: test_ineull_sext_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define void @test_ineull_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_ineull_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT: addic r5, r3, -1
+; CHECK-NEXT: subfe r3, r5, r3
+; CHECK-NEXT: std r3, 0(r12)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = zext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_ineull_sext_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_ineull_sext_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: std r3, 0(r12)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = sext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_ineull_z_store(i64 %a) {
+; CHECK-LABEL: test_ineull_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: addic r5, r3, -1
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: subfe r3, r5, r3
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = zext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_ineull_sext_z_store(i64 %a) {
+; CHECK-LABEL: test_ineull_sext_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = sext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesineus.ll b/test/CodeGen/PowerPC/testComparesineus.ll
new file mode 100644
index 000000000000..d24d854f31c9
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesineus.ll
@@ -0,0 +1,137 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i16 0, align 2
+
+define signext i32 @test_ineus(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: test_ineus:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, %b
+ %conv2 = zext i1 %cmp to i32
+ ret i32 %conv2
+}
+
+define signext i32 @test_ineus_sext(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: test_ineus_sext:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, %b
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define signext i32 @test_ineus_z(i16 zeroext %a) {
+; CHECK-LABEL: test_ineus_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, 0
+ %conv1 = zext i1 %cmp to i32
+ ret i32 %conv1
+}
+
+define signext i32 @test_ineus_sext_z(i16 zeroext %a) {
+; CHECK-LABEL: test_ineus_sext_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, 0
+ %sub = sext i1 %cmp to i32
+ ret i32 %sub
+}
+
+define void @test_ineus_store(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: test_ineus_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r5)
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: sth r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, %b
+ %conv3 = zext i1 %cmp to i16
+ store i16 %conv3, i16* @glob, align 2
+ ret void
+}
+
+define void @test_ineus_sext_store(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: test_ineus_sext_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r5)
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: sth r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, %b
+ %conv3 = sext i1 %cmp to i16
+ store i16 %conv3, i16* @glob, align 2
+ ret void
+}
+
+define void @test_ineus_z_store(i16 zeroext %a) {
+; CHECK-LABEL: test_ineus_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: sth r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, 0
+ %conv2 = zext i1 %cmp to i16
+ store i16 %conv2, i16* @glob, align 2
+ ret void
+}
+
+define void @test_ineus_sext_z_store(i16 zeroext %a) {
+; CHECK-LABEL: test_ineus_sext_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: cntlzw r3, r3
+; CHECK-NEXT: srwi r3, r3, 5
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: xori r3, r3, 1
+; CHECK-NEXT: neg r3, r3
+; CHECK-NEXT: sth r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i16 %a, 0
+ %conv2 = sext i1 %cmp to i16
+ store i16 %conv2, i16* @glob, align 2
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesllnesll.ll b/test/CodeGen/PowerPC/testComparesllnesll.ll
new file mode 100644
index 000000000000..d87ff55739fc
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesllnesll.ll
@@ -0,0 +1,125 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i64 0, align 8
+
+define i64 @test_llnesll(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llnesll:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: addic r4, r3, -1
+; CHECK-NEXT: subfe r3, r4, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = zext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define i64 @test_llnesll_sext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llnesll_sext:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = sext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define i64 @test_llnesll_z(i64 %a) {
+; CHECK-LABEL: test_llnesll_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addic r4, r3, -1
+; CHECK-NEXT: subfe r3, r4, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = zext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define i64 @test_llnesll_sext_z(i64 %a) {
+; CHECK-LABEL: test_llnesll_sext_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = sext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define void @test_llnesll_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llnesll_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT: addic r5, r3, -1
+; CHECK-NEXT: subfe r3, r5, r3
+; CHECK-NEXT: std r3, 0(r12)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = zext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_llnesll_sext_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llnesll_sext_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: std r3, 0(r12)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = sext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_llnesll_z_store(i64 %a) {
+; CHECK-LABEL: test_llnesll_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: addic r5, r3, -1
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: subfe r3, r5, r3
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = zext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_llnesll_sext_z_store(i64 %a) {
+; CHECK-LABEL: test_llnesll_sext_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = sext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesllneull.ll b/test/CodeGen/PowerPC/testComparesllneull.ll
new file mode 100644
index 000000000000..7309d5899068
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesllneull.ll
@@ -0,0 +1,125 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i64 0, align 8
+
+define i64 @test_llneull(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llneull:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: addic r4, r3, -1
+; CHECK-NEXT: subfe r3, r4, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = zext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define i64 @test_llneull_sext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llneull_sext:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = sext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define i64 @test_llneull_z(i64 %a) {
+; CHECK-LABEL: test_llneull_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addic r4, r3, -1
+; CHECK-NEXT: subfe r3, r4, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = zext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define i64 @test_llneull_sext_z(i64 %a) {
+; CHECK-LABEL: test_llneull_sext_z:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = sext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define void @test_llneull_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llneull_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT: addic r5, r3, -1
+; CHECK-NEXT: subfe r3, r5, r3
+; CHECK-NEXT: std r3, 0(r12)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = zext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_llneull_sext_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llneull_sext_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT: xor r3, r3, r4
+; CHECK-NEXT: ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: std r3, 0(r12)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, %b
+ %conv1 = sext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_llneull_z_store(i64 %a) {
+; CHECK-LABEL: test_llneull_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: addic r5, r3, -1
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: subfe r3, r5, r3
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = zext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
+
+define void @test_llneull_sext_z_store(i64 %a) {
+; CHECK-LABEL: test_llneull_sext_z_store:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT: subfic r3, r3, 0
+; CHECK-NEXT: ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT: subfe r3, r3, r3
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %cmp = icmp ne i64 %a, 0
+ %conv1 = sext i1 %cmp to i64
+ store i64 %conv1, i64* @glob, align 8
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/vec_int_ext.ll b/test/CodeGen/PowerPC/vec_int_ext.ll
new file mode 100644
index 000000000000..9e1218c423b7
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_int_ext.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define <4 x i32> @vextsb2w(<16 x i8> %a) {
+; PWR9-LABEL: vextsb2w:
+; PWR9: # BB#0: # %entry
+; PWR9-NEXT: vextsb2w 2, 2
+; PWR9-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 0
+ %conv = sext i8 %vecext to i32
+ %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
+ %vecext1 = extractelement <16 x i8> %a, i32 4
+ %conv2 = sext i8 %vecext1 to i32
+ %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
+ %vecext4 = extractelement <16 x i8> %a, i32 8
+ %conv5 = sext i8 %vecext4 to i32
+ %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
+ %vecext7 = extractelement <16 x i8> %a, i32 12
+ %conv8 = sext i8 %vecext7 to i32
+ %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
+ ret <4 x i32> %vecinit9
+}
+
+define <2 x i64> @vextsb2d(<16 x i8> %a) {
+; PWR9-LABEL: vextsb2d:
+; PWR9: # BB#0: # %entry
+; PWR9-NEXT: vextsb2d 2, 2
+; PWR9-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 0
+ %conv = sext i8 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <16 x i8> %a, i32 8
+ %conv2 = sext i8 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <4 x i32> @vextsh2w(<8 x i16> %a) {
+; PWR9-LABEL: vextsh2w:
+; PWR9: # BB#0: # %entry
+; PWR9-NEXT: vextsh2w 2, 2
+; PWR9-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 0
+ %conv = sext i16 %vecext to i32
+ %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
+ %vecext1 = extractelement <8 x i16> %a, i32 2
+ %conv2 = sext i16 %vecext1 to i32
+ %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
+ %vecext4 = extractelement <8 x i16> %a, i32 4
+ %conv5 = sext i16 %vecext4 to i32
+ %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
+ %vecext7 = extractelement <8 x i16> %a, i32 6
+ %conv8 = sext i16 %vecext7 to i32
+ %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
+ ret <4 x i32> %vecinit9
+}
+
+define <2 x i64> @vextsh2d(<8 x i16> %a) {
+; PWR9-LABEL: vextsh2d:
+; PWR9: # BB#0: # %entry
+; PWR9-NEXT: vextsh2d 2, 2
+; PWR9-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 0
+ %conv = sext i16 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <8 x i16> %a, i32 4
+ %conv2 = sext i16 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <2 x i64> @vextsw2d(<4 x i32> %a) {
+; PWR9-LABEL: vextsw2d:
+; PWR9: # BB#0: # %entry
+; PWR9-NEXT: vextsw2d 2, 2
+; PWR9-NEXT: blr
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 0
+ %conv = sext i32 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <4 x i32> %a, i32 2
+ %conv2 = sext i32 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index b1deb2c5f567..e04d10c9d64a 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,6 +1,6 @@
; REQUIRES: asserts
; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats 2>&1 | \
-; RUN: grep "asm-printer" | grep 35
+; RUN: grep "asm-printer" | grep 33
target datalayout = "e-p:32:32"
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll b/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
deleted file mode 100644
index 6c60aed67a7b..000000000000
--- a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc -mtriple i386 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
-; RUN: llc -mtriple x86_64 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
-
-define void @test_void_return() {
-; CHECK-LABEL: name: test_void_return
-; CHECK: alignment: 4
-; CHECK-NEXT: exposesReturnsTwice: false
-; CHECK-NEXT: legalized: false
-; CHECK-NEXT: regBankSelected: false
-; CHECK-NEXT: selected: false
-; CHECK-NEXT: tracksRegLiveness: true
-; CHECK-NEXT: frameInfo:
-; CHECK-NEXT: isFrameAddressTaken: false
-; CHECK-NEXT: isReturnAddressTaken: false
-; CHECK-NEXT: hasStackMap: false
-; CHECK-NEXT: hasPatchPoint: false
-; CHECK-NEXT: stackSize: 0
-; CHECK-NEXT: offsetAdjustment: 0
-; CHECK-NEXT: maxAlignment: 0
-; CHECK-NEXT: adjustsStack: false
-; CHECK-NEXT: hasCalls: false
-; CHECK-NEXT: hasOpaqueSPAdjustment: false
-; CHECK-NEXT: hasVAStart: false
-; CHECK-NEXT: hasMustTailInVarArgFunc: false
-; CHECK-NEXT: body:
-; CHECK-NEXT: bb.1.entry:
-; CHECK-NEXT: RET 0
-entry:
- ret void
-}
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
index 8ea3e4f9d739..00aa7cf84e55 100644
--- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
+++ b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=i386-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: llc -mtriple=i386-linux-gnu -mattr=+sse2 -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
@a1_8bit = external global i8
@a7_8bit = external global i8
@@ -11,8 +11,8 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4,
; ALL-LABEL: name: test_i8_args_8
; X64: fixedStack:
-; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false
-; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false
+; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true,
+; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true,
; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
; X64: [[ARG1:%[0-9]+]](s8) = COPY %edi
; X64-NEXT: %{{[0-9]+}}(s8) = COPY %esi
@@ -26,14 +26,14 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4,
; X64-NEXT: [[ARG8:%[0-9]+]](s8) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0)
; X32: fixedStack:
-; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 1, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 1, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 1, alignment: 4, isImmutable: true,
+; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 1, alignment: 8, isImmutable: true,
+; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 1, alignment: 4, isImmutable: true,
+; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 1, alignment: 16, isImmutable: true,
+; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 1, alignment: 4, isImmutable: true,
+; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true,
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 1, alignment: 4, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true,
; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1:%[0-9]+]](s8) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0)
; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -77,8 +77,8 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
; ALL-LABEL: name: test_i32_args_8
; X64: fixedStack:
-; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false
-; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false
+; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true,
+; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
; X64: [[ARG1:%[0-9]+]](s32) = COPY %edi
; X64-NEXT: %{{[0-9]+}}(s32) = COPY %esi
@@ -92,14 +92,14 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
; X64-NEXT: [[ARG8:%[0-9]+]](s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0)
; X32: fixedStack:
-; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true,
+; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -142,8 +142,8 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4,
; ALL-LABEL: name: test_i64_args_8
; X64: fixedStack:
-; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false
-; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false
+; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true,
+; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true,
; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9
; X64: [[ARG1:%[0-9]+]](s64) = COPY %rdi
; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rsi
@@ -157,22 +157,22 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4,
; X64-NEXT: [[ARG8:%[0-9]+]](s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0)
; X32: fixedStack:
-; X32: id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK60:[0-9]+]], type: default, offset: 60, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK56:[0-9]+]], type: default, offset: 56, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK52:[0-9]+]], type: default, offset: 52, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK48:[0-9]+]], type: default, offset: 48, size: 4, alignment: 16, isImmutable: true,
+; X32: id: [[STACK44:[0-9]+]], type: default, offset: 44, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK40:[0-9]+]], type: default, offset: 40, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK36:[0-9]+]], type: default, offset: 36, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK32:[0-9]+]], type: default, offset: 32, size: 4, alignment: 16, isImmutable: true,
+; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true,
+; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
; X32: [[ARG1L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1L:%[0-9]+]](s32) = G_LOAD [[ARG1L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
@@ -249,8 +249,8 @@ define float @test_float_args(float %arg1, float %arg2) {
; X64-NEXT: RET 0, implicit %xmm0
; X32: fixedStack:
-; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -270,8 +270,8 @@ define double @test_double_args(double %arg1, double %arg2) {
; X64-NEXT: RET 0, implicit %xmm0
; X32: fixedStack:
-; X32: id: [[STACK4:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true,
; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1:%[0-9]+]](s64) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0)
; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -282,6 +282,38 @@ define double @test_double_args(double %arg1, double %arg2) {
ret double %arg2
}
+define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) {
+; ALL: name: test_v4i32_args
+; ALL: liveins: %xmm0, %xmm1
+; ALL: [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0
+; ALL-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1
+; ALL-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>)
+; ALL-NEXT: RET 0, implicit %xmm0
+ ret <4 x i32> %arg2
+}
+
+define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) {
+; ALL: name: test_v8i32_args
+; ALL: liveins: %xmm0, %xmm1
+; ALL: [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0
+; ALL-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1
+; ALL-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>)
+; ALL-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>)
+; ALL-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>)
+; ALL-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>)
+; ALL-NEXT: RET 0, implicit %xmm0, implicit %xmm1
+
+ ret <8 x i32> %arg1
+}
+
+define void @test_void_return() {
+; ALL-LABEL: name: test_void_return
+; ALL: bb.1.entry:
+; ALL-NEXT: RET 0
+entry:
+ ret void
+}
+
define i32 * @test_memop_i32(i32 * %p1) {
; ALL-LABEL:name: test_memop_i32
;X64 liveins: %rdi
@@ -290,7 +322,7 @@ define i32 * @test_memop_i32(i32 * %p1) {
;X64-NEXT: RET 0, implicit %rax
;X32: fixedStack:
-;X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+;X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
;X32: %1(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
;X32-NEXT: %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
;X32-NEXT: %eax = COPY %0(p0)
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll
deleted file mode 100644
index 90a05f5fc225..000000000000
--- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
-
-define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) {
-; X64: name: test_v4i32_args
-; X64: liveins: %xmm0, %xmm1
-; X64: [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0
-; X64-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1
-; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>)
-; X64-NEXT: RET 0, implicit %xmm0
- ret <4 x i32> %arg2
-}
-
-define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) {
-; X64: name: test_v8i32_args
-; X64: liveins: %xmm0, %xmm1
-; X64: [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0
-; X64-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1
-; X64-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>)
-; X64-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>)
-; X64-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>)
-; X64-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>)
-; X64-NEXT: RET 0, implicit %xmm0, implicit %xmm1
-
- ret <8 x i32> %arg1
-}
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
index 0d66a6384107..682d01e66fa0 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
@@ -24,9 +24,9 @@ alignment: 4
legalized: false
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: _ }
-# CHECK-NEXT: - { id: 1, class: _ }
-# CHECK-NEXT: - { id: 2, class: _ }
+# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -56,9 +56,9 @@ alignment: 4
legalized: false
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: _ }
-# CHECK-NEXT: - { id: 1, class: _ }
-# CHECK-NEXT: - { id: 2, class: _ }
+# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -88,9 +88,9 @@ alignment: 4
legalized: false
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: _ }
-# CHECK-NEXT: - { id: 1, class: _ }
-# CHECK-NEXT: - { id: 2, class: _ }
+# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir
index be62832b008a..effd26e9866d 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir
@@ -26,9 +26,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -56,9 +56,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -86,9 +86,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir
index d99303c3ba3b..5ae8132156d5 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir
@@ -26,9 +26,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -56,9 +56,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -86,9 +86,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir
index 24eefd30c2ac..71ea313c4c72 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir
@@ -28,9 +28,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -58,9 +58,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -88,9 +88,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
index cc03f3a57f0b..ca238b29c2dd 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
@@ -33,8 +33,8 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_mul_vec256
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -56,8 +56,8 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_vec256
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -79,8 +79,8 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_sub_vec256
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -100,8 +100,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -122,8 +122,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
index 278413ad38ef..c94ecc8e9a8d 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
@@ -33,8 +33,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -53,8 +53,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -73,8 +73,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -93,8 +93,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -115,8 +115,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
index a115d1fa3255..b74e03f0fe79 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
@@ -14,11 +14,11 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
-# CHECK-NEXT: - { id: 4, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
index 1ea922ee475a..7bcc57aef4ac 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
@@ -145,9 +145,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_i8
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -172,9 +172,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_i16
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -199,9 +199,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_i32
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -226,9 +226,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_i64
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -253,14 +253,14 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_mul_gpr
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
-# CHECK: - { id: 3, class: gpr }
-# CHECK: - { id: 4, class: gpr }
-# CHECK: - { id: 5, class: gpr }
-# CHECK: - { id: 6, class: gpr }
-# CHECK: - { id: 7, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK: - { id: 3, class: gpr, preferred-register: '' }
+# CHECK: - { id: 4, class: gpr, preferred-register: '' }
+# CHECK: - { id: 5, class: gpr, preferred-register: '' }
+# CHECK: - { id: 6, class: gpr, preferred-register: '' }
+# CHECK: - { id: 7, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -292,9 +292,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_float
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
-# CHECK: - { id: 2, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK: - { id: 2, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -319,9 +319,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_double
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
-# CHECK: - { id: 2, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK: - { id: 2, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -346,9 +346,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_v4i32
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
-# CHECK: - { id: 2, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK: - { id: 2, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -373,9 +373,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_v4f32
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
-# CHECK: - { id: 2, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK: - { id: 2, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -399,8 +399,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_i8
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -422,8 +422,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_i16
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -445,8 +445,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_i32
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -469,8 +469,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_i64
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -492,8 +492,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_float
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -515,8 +515,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_double
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -538,8 +538,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_v4i32
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: vecr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -561,8 +561,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_store_i32
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -585,8 +585,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_store_i64
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -610,12 +610,12 @@ selected: false
# CHECK-LABEL: name: test_store_float
# CHECK: registers:
-# FAST-NEXT: - { id: 0, class: vecr }
-# FAST-NEXT: - { id: 1, class: gpr }
-# FAST-NEXT: - { id: 2, class: gpr }
+# FAST-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# FAST-NEXT: - { id: 2, class: gpr, preferred-register: '' }
-# GREEDY-NEXT: - { id: 0, class: vecr }
-# GREEDY-NEXT: - { id: 1, class: gpr }
+# GREEDY-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -647,12 +647,12 @@ selected: false
# CHECK-LABEL: name: test_store_double
# CHECK: registers:
-# FAST-NEXT: - { id: 0, class: vecr }
-# FAST-NEXT: - { id: 1, class: gpr }
-# FAST-NEXT: - { id: 2, class: gpr }
+# FAST-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# FAST-NEXT: - { id: 2, class: gpr, preferred-register: '' }
-# GREEDY-NEXT: - { id: 0, class: vecr }
-# GREEDY-NEXT: - { id: 1, class: gpr }
+# GREEDY-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -682,10 +682,10 @@ alignment: 4
legalized: true
# CHECK-LABEL: name: constInt_check
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -706,10 +706,10 @@ alignment: 4
legalized: true
# CHECK-LABEL: name: trunc_check
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -729,11 +729,11 @@ name: test_gep
legalized: true
# CHECK-LABEL: name: test_gep
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
-# CHECK-NEXT: - { id: 4, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -757,9 +757,9 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -782,9 +782,9 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -807,9 +807,9 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -832,9 +832,9 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-v128.mir b/test/CodeGen/X86/GlobalISel/select-add-v128.mir
index a39702340bc2..4f7b6ec72d52 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-v128.mir
@@ -32,19 +32,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128 }
-# AVX512VL-NEXT: - { id: 1, class: vr128 }
-# AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -74,19 +74,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128 }
-# AVX512VL-NEXT: - { id: 1, class: vr128 }
-# AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -116,19 +116,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -158,19 +158,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-v256.mir b/test/CodeGen/X86/GlobalISel/select-add-v256.mir
index 7556c2104124..143fd9422974 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-v256.mir
@@ -30,19 +30,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256 }
-# AVX512VL-NEXT: - { id: 1, class: vr256 }
-# AVX512VL-NEXT: - { id: 2, class: vr256 }
+# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -70,19 +70,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256 }
-# AVX512VL-NEXT: - { id: 1, class: vr256 }
-# AVX512VL-NEXT: - { id: 2, class: vr256 }
+# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -110,19 +110,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256x }
-# AVX512VL-NEXT: - { id: 1, class: vr256x }
-# AVX512VL-NEXT: - { id: 2, class: vr256x }
+# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -150,19 +150,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256x }
-# AVX512VL-NEXT: - { id: 1, class: vr256x }
-# AVX512VL-NEXT: - { id: 2, class: vr256x }
+# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-v512.mir b/test/CodeGen/X86/GlobalISel/select-add-v512.mir
index e90be4e996f8..6a0cd32eefd5 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-v512.mir
@@ -31,9 +31,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -57,9 +57,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -83,9 +83,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -109,9 +109,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/test/CodeGen/X86/GlobalISel/select-add-x32.mir
index 8710aaa61a21..0b864f417367 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-x32.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-x32.mir
@@ -13,16 +13,16 @@ alignment: 4
legalized: true
regBankSelected: true
# X32: registers:
-# X32-NEXT: - { id: 0, class: gr32 }
-# X32-NEXT: - { id: 1, class: gr32 }
-# X32-NEXT: - { id: 2, class: gr32 }
-# X32-NEXT: - { id: 3, class: gr32 }
-# X32-NEXT: - { id: 4, class: gpr }
-# X32-NEXT: - { id: 5, class: gr32 }
-# X32-NEXT: - { id: 6, class: gr32 }
-# X32-NEXT: - { id: 7, class: gr32 }
-# X32-NEXT: - { id: 8, class: gr32 }
-# X32-NEXT: - { id: 9, class: gpr }
+# X32-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 4, class: gpr, preferred-register: '' }
+# X32-NEXT: - { id: 5, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 6, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 7, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 8, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 9, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add.mir b/test/CodeGen/X86/GlobalISel/select-add.mir
index 7337ce12c395..78e6bb6913a4 100644
--- a/test/CodeGen/X86/GlobalISel/select-add.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add.mir
@@ -51,9 +51,9 @@ name: test_add_i64
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr64 }
-# ALL-NEXT: - { id: 1, class: gr64 }
-# ALL-NEXT: - { id: 2, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -78,9 +78,9 @@ name: test_add_i32
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -106,9 +106,9 @@ legalized: true
regBankSelected: true
selected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr16 }
-# ALL-NEXT: - { id: 2, class: gr16 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -135,9 +135,9 @@ legalized: true
regBankSelected: true
selected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr8 }
-# ALL-NEXT: - { id: 2, class: gr8 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -165,12 +165,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr32 }
-# AVX512ALL-NEXT: - { id: 0, class: fr32x }
-# AVX512ALL-NEXT: - { id: 1, class: fr32x }
-# AVX512ALL-NEXT: - { id: 2, class: fr32x }
+# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -200,12 +200,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr64 }
-# AVX512ALL-NEXT: - { id: 0, class: fr64x }
-# AVX512ALL-NEXT: - { id: 1, class: fr64x }
-# AVX512ALL-NEXT: - { id: 2, class: fr64x }
+# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -235,12 +235,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -271,12 +271,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-cmp.mir b/test/CodeGen/X86/GlobalISel/select-cmp.mir
index a92c388c1db9..64c8cb6b823a 100644
--- a/test/CodeGen/X86/GlobalISel/select-cmp.mir
+++ b/test/CodeGen/X86/GlobalISel/select-cmp.mir
@@ -87,11 +87,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr8 }
-# CHECK-NEXT: - { id: 1, class: gr8 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -124,11 +124,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr16 }
-# CHECK-NEXT: - { id: 1, class: gr16 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -161,11 +161,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
-# CHECK-NEXT: - { id: 1, class: gr64 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -198,11 +198,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -235,11 +235,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -272,11 +272,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -309,11 +309,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -346,11 +346,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -383,11 +383,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -420,11 +420,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -457,11 +457,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -494,11 +494,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -531,11 +531,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-constant.mir b/test/CodeGen/X86/GlobalISel/select-constant.mir
index 162de0264435..7902a5084ce6 100644
--- a/test/CodeGen/X86/GlobalISel/select-constant.mir
+++ b/test/CodeGen/X86/GlobalISel/select-constant.mir
@@ -33,7 +33,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i8
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr8 }
+# CHECK-NEXT: - { id: 0, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -52,7 +52,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i16
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr16 }
+# CHECK-NEXT: - { id: 0, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -71,7 +71,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -90,7 +90,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i64
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -110,7 +110,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i64_u32
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -129,7 +129,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i64_i32
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
diff --git a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
index d1a3abfd0f93..edb467b2bf90 100644
--- a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
+++ b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
@@ -25,10 +25,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr8 }
-# ALL-NEXT: - { id: 2, class: gr64 }
-# ALL-NEXT: - { id: 3, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -57,8 +57,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -83,8 +83,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-ext.mir b/test/CodeGen/X86/GlobalISel/select-ext.mir
index dccc20e57100..b52f1f6fa621 100644
--- a/test/CodeGen/X86/GlobalISel/select-ext.mir
+++ b/test/CodeGen/X86/GlobalISel/select-ext.mir
@@ -35,9 +35,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -63,8 +63,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -89,8 +89,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -115,8 +115,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -141,8 +141,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-gep.mir b/test/CodeGen/X86/GlobalISel/select-gep.mir
index c8a4dc80cb2c..61c766230035 100644
--- a/test/CodeGen/X86/GlobalISel/select-gep.mir
+++ b/test/CodeGen/X86/GlobalISel/select-gep.mir
@@ -14,9 +14,9 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: test_gep_i32
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
-# CHECK-NEXT: - { id: 1, class: gr64_nosp }
-# CHECK-NEXT: - { id: 2, class: gr64 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr64_nosp, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-inc.mir b/test/CodeGen/X86/GlobalISel/select-inc.mir
index 7a77864091d3..47fe6ef672ba 100644
--- a/test/CodeGen/X86/GlobalISel/select-inc.mir
+++ b/test/CodeGen/X86/GlobalISel/select-inc.mir
@@ -13,10 +13,10 @@ name: test_add_i8
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# INC-NEXT: - { id: 1, class: gpr }
-# ADD-NEXT: - { id: 1, class: gr8 }
-# ALL-NEXT: - { id: 2, class: gr8 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# INC-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# ADD-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir
index 539520c0b8f5..9128f19b1d24 100644
--- a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir
+++ b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir
@@ -29,7 +29,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32_1
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -47,7 +47,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32_1_optsize
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -65,7 +65,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32_1b
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -83,7 +83,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32_1_optsizeb
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir
index 8e6a2771db6e..09f414b48a8a 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir
@@ -49,9 +49,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr8 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -79,9 +79,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr16 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -109,9 +109,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -139,10 +139,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-# ALL-NEXT: - { id: 3, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -176,10 +176,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-# ALL-NEXT: - { id: 3, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -213,10 +213,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-# ALL-NEXT: - { id: 3, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -250,9 +250,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -280,10 +280,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-# ALL-NEXT: - { id: 3, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir
index b57c9b0cca98..6d03d7525d20 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir
@@ -91,8 +91,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr8 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr8, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -115,8 +115,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr16 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr16, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -139,8 +139,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr32 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr32, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -163,8 +163,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -187,8 +187,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr32 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr32, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -211,9 +211,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# NO_AVX512F: - { id: 1, class: fr32 }
-# AVX512ALL: - { id: 1, class: fr32x }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F: - { id: 1, class: fr32, preferred-register: '' }
+# AVX512ALL: - { id: 1, class: fr32x, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# ALL: %0 = COPY %rdi
@@ -238,8 +238,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -262,9 +262,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# NO_AVX512F: - { id: 1, class: fr64 }
-# AVX512ALL: - { id: 1, class: fr64x }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F: - { id: 1, class: fr64, preferred-register: '' }
+# AVX512ALL: - { id: 1, class: fr64x, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# ALL: %0 = COPY %rdi
@@ -289,8 +289,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr32 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr32, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %edi
@@ -315,8 +315,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -341,9 +341,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: fr32x }
-# ALL: - { id: 1, class: gr64 }
-# ALL: - { id: 2, class: gr32 }
+# ALL: - { id: 0, class: fr32x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
+# ALL: - { id: 2, class: gr32, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
- { id: 2, class: gpr }
@@ -371,9 +371,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# NO_AVX512F: - { id: 0, class: fr32 }
-# AVX512ALL: - { id: 0, class: fr32x }
-# ALL: - { id: 1, class: gr64 }
+# NO_AVX512F: - { id: 0, class: fr32, preferred-register: '' }
+# AVX512ALL: - { id: 0, class: fr32x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %xmm0
@@ -400,9 +400,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: fr64x }
-# ALL: - { id: 1, class: gr64 }
-# ALL: - { id: 2, class: gr64 }
+# ALL: - { id: 0, class: fr64x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
+# ALL: - { id: 2, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
- { id: 2, class: gpr }
@@ -430,9 +430,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# NO_AVX512F: - { id: 0, class: fr64 }
-# AVX512ALL: - { id: 0, class: fr64x }
-# ALL: - { id: 1, class: gr64 }
+# NO_AVX512F: - { id: 0, class: fr64, preferred-register: '' }
+# AVX512ALL: - { id: 0, class: fr64x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %xmm0
@@ -460,8 +460,8 @@ legalized: true
regBankSelected: true
selected: false
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.ptr1)
@@ -483,8 +483,8 @@ legalized: true
regBankSelected: true
selected: false
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: MOV64mr %0, 1, _, 0, _, %1 :: (store 8 into %ir.ptr1)
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir
index ce3f6b91dcf6..08844657e2a2 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir
@@ -32,9 +32,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# NO_AVX512F: - { id: 1, class: vr128 }
-# AVX512ALL: - { id: 1, class: vr128x }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512ALL: - { id: 1, class: vr128x, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# ALL: %0 = COPY %rdi
@@ -60,9 +60,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# NO_AVX512F: - { id: 1, class: vr128 }
-# AVX512ALL: - { id: 1, class: vr128x }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512ALL: - { id: 1, class: vr128x, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# ALL: %0 = COPY %rdi
@@ -88,9 +88,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# NO_AVX512F: - { id: 0, class: vr128 }
-# AVX512ALL: - { id: 0, class: vr128x }
-# ALL: - { id: 1, class: gr64 }
+# NO_AVX512F: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512ALL: - { id: 0, class: vr128x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %xmm0
@@ -118,9 +118,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# NO_AVX512F: - { id: 0, class: vr128 }
-# AVX512ALL: - { id: 0, class: vr128x }
-# ALL: - { id: 1, class: gr64 }
+# NO_AVX512F: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512ALL: - { id: 0, class: vr128x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %xmm0
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir
index b9a7e4a8cc4a..ff371ad9989f 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir
@@ -33,12 +33,12 @@ alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: gr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: vr256 }
+# NO_AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: vr256, preferred-register: '' }
#
# AVX512ALL: registers:
-# AVX512ALL-NEXT: - { id: 0, class: gr64 }
-# AVX512ALL-NEXT: - { id: 1, class: vr256x }
+# AVX512ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
@@ -73,12 +73,12 @@ alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: gr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: vr256 }
+# NO_AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: vr256, preferred-register: '' }
#
# AVX512ALL: registers:
-# AVX512ALL-NEXT: - { id: 0, class: gr64 }
-# AVX512ALL-NEXT: - { id: 1, class: vr256x }
+# AVX512ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
@@ -113,12 +113,12 @@ alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: vr256 }
-# NO_AVX512F-NEXT: - { id: 1, class: gr64 }
+# NO_AVX512F-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' }
#
# AVX512ALL: registers:
-# AVX512ALL-NEXT: - { id: 0, class: vr256x }
-# AVX512ALL-NEXT: - { id: 1, class: gr64 }
+# AVX512ALL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
@@ -153,12 +153,12 @@ alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: vr256 }
-# NO_AVX512F-NEXT: - { id: 1, class: gr64 }
+# NO_AVX512F-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' }
#
# AVX512ALL: registers:
-# AVX512ALL-NEXT: - { id: 0, class: vr256x }
-# AVX512ALL-NEXT: - { id: 1, class: gr64 }
+# AVX512ALL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir
index 87978a684d4c..131902d81a00 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir
@@ -28,8 +28,8 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
-# AVX512F-NEXT: - { id: 0, class: gr64 }
-# AVX512F-NEXT: - { id: 1, class: vr512 }
+# AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# AVX512F-NEXT: - { id: 1, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
@@ -54,8 +54,8 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
-# AVX512F-NEXT: - { id: 0, class: gr64 }
-# AVX512F-NEXT: - { id: 1, class: vr512 }
+# AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# AVX512F-NEXT: - { id: 1, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
@@ -80,8 +80,8 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
-# AVX512F-NEXT: - { id: 0, class: vr512 }
-# AVX512F-NEXT: - { id: 1, class: gr64 }
+# AVX512F-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
@@ -106,8 +106,8 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
-# AVX512F-NEXT: - { id: 0, class: vr512 }
-# AVX512F-NEXT: - { id: 1, class: gr64 }
+# AVX512F-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir
index 34a77acc2d1e..453557c08469 100644
--- a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir
+++ b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir
@@ -24,9 +24,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr16 }
-# ALL-NEXT: - { id: 2, class: gr16 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -55,9 +55,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -86,9 +86,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr64 }
-# ALL-NEXT: - { id: 1, class: gr64 }
-# ALL-NEXT: - { id: 2, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir
index 5f8ab1e4f189..d3651ccd1ab9 100644
--- a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir
+++ b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir
@@ -95,9 +95,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128 }
-# CHECK-NEXT: - { id: 1, class: vr128 }
-# CHECK-NEXT: - { id: 2, class: vr128 }
+# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -121,9 +121,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128 }
-# CHECK-NEXT: - { id: 1, class: vr128 }
-# CHECK-NEXT: - { id: 2, class: vr128 }
+# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -147,9 +147,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128x }
-# CHECK-NEXT: - { id: 1, class: vr128x }
-# CHECK-NEXT: - { id: 2, class: vr128x }
+# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -173,9 +173,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128 }
-# CHECK-NEXT: - { id: 1, class: vr128 }
-# CHECK-NEXT: - { id: 2, class: vr128 }
+# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -199,9 +199,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128 }
-# CHECK-NEXT: - { id: 1, class: vr128 }
-# CHECK-NEXT: - { id: 2, class: vr128 }
+# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -225,9 +225,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128x }
-# CHECK-NEXT: - { id: 1, class: vr128x }
-# CHECK-NEXT: - { id: 2, class: vr128x }
+# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -251,9 +251,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128x }
-# CHECK-NEXT: - { id: 1, class: vr128x }
-# CHECK-NEXT: - { id: 2, class: vr128x }
+# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -277,9 +277,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256 }
-# CHECK-NEXT: - { id: 1, class: vr256 }
-# CHECK-NEXT: - { id: 2, class: vr256 }
+# CHECK-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -303,9 +303,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256x }
-# CHECK-NEXT: - { id: 1, class: vr256x }
-# CHECK-NEXT: - { id: 2, class: vr256x }
+# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -329,9 +329,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256 }
-# CHECK-NEXT: - { id: 1, class: vr256 }
-# CHECK-NEXT: - { id: 2, class: vr256 }
+# CHECK-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -355,9 +355,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256x }
-# CHECK-NEXT: - { id: 1, class: vr256x }
-# CHECK-NEXT: - { id: 2, class: vr256x }
+# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -381,9 +381,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256x }
-# CHECK-NEXT: - { id: 1, class: vr256x }
-# CHECK-NEXT: - { id: 2, class: vr256x }
+# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -407,9 +407,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr512 }
-# CHECK-NEXT: - { id: 1, class: vr512 }
-# CHECK-NEXT: - { id: 2, class: vr512 }
+# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -433,9 +433,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr512 }
-# CHECK-NEXT: - { id: 1, class: vr512 }
-# CHECK-NEXT: - { id: 2, class: vr512 }
+# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -459,9 +459,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr512 }
-# CHECK-NEXT: - { id: 1, class: vr512 }
-# CHECK-NEXT: - { id: 2, class: vr512 }
+# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
index d60d4155e29d..f77879d93009 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
@@ -32,19 +32,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128 }
-# AVX512VL-NEXT: - { id: 1, class: vr128 }
-# AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -74,19 +74,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128 }
-# AVX512VL-NEXT: - { id: 1, class: vr128 }
-# AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -116,19 +116,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -158,19 +158,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
index fbc44997b4a2..d6bde7fbb691 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
@@ -30,19 +30,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256 }
-# AVX512VL-NEXT: - { id: 1, class: vr256 }
-# AVX512VL-NEXT: - { id: 2, class: vr256 }
+# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -70,19 +70,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256 }
-# AVX512VL-NEXT: - { id: 1, class: vr256 }
-# AVX512VL-NEXT: - { id: 2, class: vr256 }
+# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -110,19 +110,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256x }
-# AVX512VL-NEXT: - { id: 1, class: vr256x }
-# AVX512VL-NEXT: - { id: 2, class: vr256x }
+# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -150,19 +150,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256x }
-# AVX512VL-NEXT: - { id: 1, class: vr256x }
-# AVX512VL-NEXT: - { id: 2, class: vr256x }
+# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
index dcd05f056949..828a243b2656 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
@@ -31,9 +31,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -57,9 +57,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -83,9 +83,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -109,9 +109,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub.mir b/test/CodeGen/X86/GlobalISel/select-sub.mir
index d4db6eec6d80..4768a2d93222 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub.mir
@@ -40,9 +40,9 @@ name: test_sub_i64
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr64 }
-# ALL-NEXT: - { id: 1, class: gr64 }
-# ALL-NEXT: - { id: 2, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -66,9 +66,9 @@ name: test_sub_i32
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -94,12 +94,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr32 }
-# AVX512ALL-NEXT: - { id: 0, class: fr32x }
-# AVX512ALL-NEXT: - { id: 1, class: fr32x }
-# AVX512ALL-NEXT: - { id: 2, class: fr32x }
+# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -128,12 +128,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr64 }
-# AVX512ALL-NEXT: - { id: 0, class: fr64x }
-# AVX512ALL-NEXT: - { id: 1, class: fr64x }
-# AVX512ALL-NEXT: - { id: 2, class: fr64x }
+# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -161,12 +161,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -196,12 +196,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-trunc.mir b/test/CodeGen/X86/GlobalISel/select-trunc.mir
index 9b90543d6559..4df585628ddc 100644
--- a/test/CodeGen/X86/GlobalISel/select-trunc.mir
+++ b/test/CodeGen/X86/GlobalISel/select-trunc.mir
@@ -38,8 +38,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr8 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -64,8 +64,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr8 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -90,8 +90,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr16 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -116,8 +116,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64_with_sub_8bit }
-# CHECK-NEXT: - { id: 1, class: gr8 }
+# CHECK-NEXT: - { id: 0, class: gr64_with_sub_8bit, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -142,8 +142,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
-# CHECK-NEXT: - { id: 1, class: gr16 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -168,8 +168,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll
index 874e3e379d8e..5e375cc42e01 100644
--- a/test/CodeGen/X86/O0-pipeline.ll
+++ b/test/CodeGen/X86/O0-pipeline.ll
@@ -5,12 +5,12 @@
; CHECK-LABEL: Pass Arguments:
; CHECK-NEXT: Target Library Information
; CHECK-NEXT: Target Pass Configuration
+; CHECK-NEXT: Machine Module Information
; CHECK-NEXT: Target Transform Information
; CHECK-NEXT: Type-Based Alias Analysis
; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Create Garbage Collector Module Metadata
-; CHECK-NEXT: Machine Module Information
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
diff --git a/test/CodeGen/X86/atom-fixup-lea3.ll b/test/CodeGen/X86/atom-fixup-lea3.ll
index ed2df277480e..e79d2e69e347 100644
--- a/test/CodeGen/X86/atom-fixup-lea3.ll
+++ b/test/CodeGen/X86/atom-fixup-lea3.ll
@@ -1,6 +1,8 @@
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
-; CHECK: addl ([[reg:%[a-z]+]])
-; CHECK-NEXT: addl $4, [[reg]]
+; CHECK: addl ({{%[a-z]+}},[[reg:%[a-z]+]],4)
+; CHECK-NEXT: movl
+; CHECK-NEXT: addl 4({{%[a-z]+}},[[reg:%[a-z]+]],4)
+; CHECK-NEXT: incl
; Test for the FixupLEAs pre-emit pass.
; An LEA should NOT be substituted for the ADD instruction
@@ -20,7 +22,7 @@
; return sum;
;}
-define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %m, i32* nocapture %array2) #0 {
+define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %k, i32* nocapture %l, i32* nocapture %m, i32* nocapture %array2) #0 {
entry:
%cmp7 = icmp sgt i32 %n, 0
br i1 %cmp7, label %for.body.lr.ph, label %for.end
@@ -35,6 +37,9 @@ for.body: ; preds = %for.body, %for.body
%j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ]
%inc1 = add nsw i32 %j.09, 1
%arrayidx = getelementptr inbounds i32, i32* %array2, i32 %j.09
+ store i32 %0, i32* %m, align 4
+ store i32 %sum.010, i32* %m, align 4
+ store i32 %0, i32* %m, align 4
%1 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %1
store i32 %add, i32* %m, align 4
diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll
index bb05481e313d..47e95fe31bdf 100644
--- a/test/CodeGen/X86/avx-schedule.ll
+++ b/test/CodeGen/X86/avx-schedule.ll
@@ -910,14 +910,14 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double
;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_haddpd:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
%1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
%2 = load <4 x double>, <4 x double> *%a2, align 32
@@ -941,14 +941,14 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
;
; BTVER2-LABEL: test_haddps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_haddps:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
%1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
%2 = load <8 x float>, <8 x float> *%a2, align 32
@@ -972,14 +972,14 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double
;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_hsubpd:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
%1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
%2 = load <4 x double>, <4 x double> *%a2, align 32
@@ -1003,14 +1003,14 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_hsubps:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
%1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
%2 = load <8 x float>, <8 x float> *%a2, align 32
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 1914b5134bee..91d1f64c6706 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcA:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq
@@ -14,7 +14,7 @@ entry:
define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcB:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -26,7 +26,7 @@ entry:
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcC:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vmovq %rdi, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -41,7 +41,7 @@ entry:
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcD:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq
@@ -58,20 +58,20 @@ entry:
;
define <8 x float> @funcE() nounwind {
; CHECK-LABEL: funcE:
-; CHECK: ## BB#0: ## %for_exit499
+; CHECK: # BB#0: # %for_exit499
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: ## implicit-def: %YMM0
+; CHECK-NEXT: # implicit-def: %YMM0
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jne LBB4_2
-; CHECK-NEXT: ## BB#1: ## %load.i1247
+; CHECK-NEXT: jne .LBB4_2
+; CHECK-NEXT: # BB#1: # %load.i1247
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: andq $-32, %rsp
-; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520
+; CHECK-NEXT: subq $1312, %rsp # imm = 0x520
; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249
+; CHECK-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249
; CHECK-NEXT: retq
allocas:
%udx495 = alloca [18 x [18 x float]], align 32
@@ -99,7 +99,7 @@ __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_ex
define <8 x float> @funcF(i32 %val) nounwind {
; CHECK-LABEL: funcF:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vmovd %edi, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -112,7 +112,7 @@ define <8 x float> @funcF(i32 %val) nounwind {
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcG:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq
@@ -123,7 +123,7 @@ entry:
define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcH:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; CHECK-NEXT: retq
@@ -134,7 +134,7 @@ entry:
define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
; CHECK-LABEL: splat_load_2f64_11:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; CHECK-NEXT: retq
%x = load <2 x double>, <2 x double>* %ptr
@@ -144,7 +144,7 @@ define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
; CHECK-LABEL: splat_load_4f64_2222:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
; CHECK-NEXT: retq
%x = load <4 x double>, <4 x double>* %ptr
@@ -154,7 +154,7 @@ define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
; CHECK-LABEL: splat_load_4f32_0000:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
; CHECK-NEXT: retq
%x = load <4 x float>, <4 x float>* %ptr
@@ -164,7 +164,7 @@ define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
; CHECK-LABEL: splat_load_8f32_77777777:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0
; CHECK-NEXT: retq
%x = load <8 x float>, <8 x float>* %ptr
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 8f6afa8785d0..140299f5495d 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -1549,8 +1549,6 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpextrb $8, %xmm0, %eax
; NOVL-NEXT: andl $1, %eax
@@ -1579,8 +1577,6 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; NOVL-NEXT: retq
diff --git a/test/CodeGen/X86/build-vector-128.ll b/test/CodeGen/X86/build-vector-128.ll
index 8c3a6790ffa6..c73d7654045e 100644
--- a/test/CodeGen/X86/build-vector-128.ll
+++ b/test/CodeGen/X86/build-vector-128.ll
@@ -41,9 +41,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa
;
; SSE2-64-LABEL: test_buildvector_v4f32:
; SSE2-64: # BB#0:
-; SSE2-64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-64-NEXT: retq
;
; SSE41-64-LABEL: test_buildvector_v4f32:
@@ -74,13 +74,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa
define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) {
; SSE2-32-LABEL: test_buildvector_v2i64:
; SSE2-32: # BB#0:
-; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-32-NEXT: retl
;
; SSE-64-LABEL: test_buildvector_v2i64:
@@ -126,12 +122,12 @@ define <4 x i32> @test_buildvector_v4i32(i32 %f0, i32 %f1, i32 %f2, i32 %f3) {
; SSE2-64-LABEL: test_buildvector_v4i32:
; SSE2-64: # BB#0:
; SSE2-64-NEXT: movd %ecx, %xmm0
-; SSE2-64-NEXT: movd %esi, %xmm1
+; SSE2-64-NEXT: movd %edx, %xmm1
; SSE2-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-64-NEXT: movd %edx, %xmm2
+; SSE2-64-NEXT: movd %esi, %xmm2
; SSE2-64-NEXT: movd %edi, %xmm0
; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-64-NEXT: retq
;
; SSE41-64-LABEL: test_buildvector_v4i32:
@@ -170,34 +166,34 @@ define <8 x i16> @test_buildvector_v8i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-32-NEXT: retl
;
; SSE2-64-LABEL: test_buildvector_v8i16:
; SSE2-64: # BB#0:
-; SSE2-64-NEXT: movd %ecx, %xmm0
+; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-64-NEXT: movd %r9d, %xmm1
-; SSE2-64-NEXT: movd %esi, %xmm2
-; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-64-NEXT: movd %r9d, %xmm0
+; SSE2-64-NEXT: movd %r8d, %xmm2
; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-64-NEXT: movd %ecx, %xmm0
; SSE2-64-NEXT: movd %edx, %xmm1
-; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-64-NEXT: movd %r8d, %xmm3
+; SSE2-64-NEXT: movd %esi, %xmm3
; SSE2-64-NEXT: movd %edi, %xmm0
; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-64-NEXT: retq
;
; SSE41-32-LABEL: test_buildvector_v8i16:
@@ -267,31 +263,31 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-32-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE2-32-NEXT: retl
;
; SSE2-64-LABEL: test_buildvector_v16i8:
@@ -299,34 +295,34 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-64-NEXT: movd %ecx, %xmm0
-; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-64-NEXT: movd %r9d, %xmm1
+; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-64-NEXT: movd %esi, %xmm2
-; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-64-NEXT: movd %edx, %xmm3
; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-64-NEXT: movd %r8d, %xmm1
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-64-NEXT: movd %r9d, %xmm0
+; SSE2-64-NEXT: movd %r8d, %xmm2
+; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-64-NEXT: movd %ecx, %xmm0
+; SSE2-64-NEXT: movd %edx, %xmm1
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-64-NEXT: movd %esi, %xmm4
; SSE2-64-NEXT: movd %edi, %xmm0
-; SSE2-64-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE2-64-NEXT: retq
;
; SSE41-32-LABEL: test_buildvector_v16i8:
diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll
index 730376acdc93..cd5abc1373b9 100644
--- a/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/test/CodeGen/X86/buildvec-insertvec.ll
@@ -75,9 +75,9 @@ entry:
define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float %f2, float %f3) {
; SSE2-LABEL: test_buildvector_v4f32_register:
; SSE2: # BB#0:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_register:
@@ -102,7 +102,7 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_load:
@@ -126,10 +126,10 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %
define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, float* %p3) {
; SSE2-LABEL: test_buildvector_v4f32_partial_load:
; SSE2: # BB#0:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_partial_load:
@@ -150,12 +150,12 @@ define <4 x i32> @test_buildvector_v4i32_register(i32 %a0, i32 %a1, i32 %a2, i32
; SSE2-LABEL: test_buildvector_v4i32_register:
; SSE2: # BB#0:
; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: movd %esi, %xmm1
+; SSE2-NEXT: movd %edx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: movd %edx, %xmm2
+; SSE2-NEXT: movd %esi, %xmm2
; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4i32_register:
@@ -178,7 +178,7 @@ define <4 x i32> @test_buildvector_v4i32_partial(i32 %a0, i32 %a3) {
; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: movd %esi, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4i32_partial:
@@ -228,21 +228,21 @@ define <4 x i32> @test_buildvector_v4i32_register_zero_2(i32 %a1, i32 %a2, i32 %
define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) {
; SSE2-LABEL: test_buildvector_v8i16_register:
; SSE2: # BB#0:
-; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: movd %r9d, %xmm1
-; SSE2-NEXT: movd %esi, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: movd %r9d, %xmm0
+; SSE2-NEXT: movd %r8d, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movd %edx, %xmm1
-; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movd %r8d, %xmm3
+; SSE2-NEXT: movd %esi, %xmm3
; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v8i16_register:
@@ -333,34 +333,34 @@ define <16 x i8> @test_buildvector_v16i8_register(i8 %a0, i8 %a1, i8 %a2, i8 %a3
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: movd %r9d, %xmm1
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT: movd %esi, %xmm2
-; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: movd %edx, %xmm3
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-NEXT: movd %r8d, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %r9d, %xmm0
+; SSE2-NEXT: movd %r8d, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %esi, %xmm4
; SSE2-NEXT: movd %edi, %xmm0
-; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v16i8_register:
diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll
index 1218b68b1be4..f6d816ec8919 100644
--- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll
+++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll
@@ -159,28 +159,7 @@ define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind {
define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
; SSE-LABEL: _clearupper8xi16a:
; SSE: # BB#0:
-; SSE-NEXT: pextrw $1, %xmm0, %eax
-; SSE-NEXT: pextrw $2, %xmm0, %r9d
-; SSE-NEXT: pextrw $3, %xmm0, %edx
-; SSE-NEXT: pextrw $4, %xmm0, %r8d
-; SSE-NEXT: pextrw $5, %xmm0, %edi
-; SSE-NEXT: pextrw $6, %xmm0, %esi
-; SSE-NEXT: pextrw $7, %xmm0, %ecx
-; SSE-NEXT: movd %ecx, %xmm1
-; SSE-NEXT: movd %edx, %xmm2
-; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE-NEXT: movd %edi, %xmm1
-; SSE-NEXT: movd %eax, %xmm3
-; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE-NEXT: movd %esi, %xmm1
-; SSE-NEXT: movd %r9d, %xmm2
-; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE-NEXT: movd %r8d, %xmm1
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: andps {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: _clearupper8xi16a:
@@ -225,61 +204,9 @@ define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
; SSE-LABEL: _clearupper16xi16a:
; SSE: # BB#0:
-; SSE-NEXT: pushq %rbp
-; SSE-NEXT: pushq %r15
-; SSE-NEXT: pushq %r14
-; SSE-NEXT: pushq %r12
-; SSE-NEXT: pushq %rbx
-; SSE-NEXT: pextrw $1, %xmm0, %edi
-; SSE-NEXT: pextrw $2, %xmm0, %eax
-; SSE-NEXT: pextrw $3, %xmm0, %ecx
-; SSE-NEXT: pextrw $4, %xmm0, %edx
-; SSE-NEXT: pextrw $5, %xmm0, %esi
-; SSE-NEXT: pextrw $6, %xmm0, %ebx
-; SSE-NEXT: pextrw $7, %xmm0, %ebp
-; SSE-NEXT: pextrw $1, %xmm1, %r10d
-; SSE-NEXT: pextrw $2, %xmm1, %r9d
-; SSE-NEXT: pextrw $3, %xmm1, %r14d
-; SSE-NEXT: pextrw $4, %xmm1, %r8d
-; SSE-NEXT: pextrw $5, %xmm1, %r15d
-; SSE-NEXT: pextrw $6, %xmm1, %r11d
-; SSE-NEXT: pextrw $7, %xmm1, %r12d
-; SSE-NEXT: movd %ebp, %xmm2
-; SSE-NEXT: movd %ecx, %xmm3
-; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE-NEXT: movd %esi, %xmm2
-; SSE-NEXT: movd %edi, %xmm4
-; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE-NEXT: movd %ebx, %xmm2
-; SSE-NEXT: movd %eax, %xmm3
-; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE-NEXT: movd %edx, %xmm2
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: movd %r12d, %xmm3
-; SSE-NEXT: movd %r14d, %xmm4
-; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE-NEXT: movd %r15d, %xmm3
-; SSE-NEXT: movd %r10d, %xmm5
-; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
-; SSE-NEXT: movd %r11d, %xmm3
-; SSE-NEXT: movd %r9d, %xmm4
-; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE-NEXT: movd %r8d, %xmm3
-; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
-; SSE-NEXT: pand %xmm2, %xmm1
-; SSE-NEXT: popq %rbx
-; SSE-NEXT: popq %r12
-; SSE-NEXT: popq %r14
-; SSE-NEXT: popq %r15
-; SSE-NEXT: popq %rbp
+; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE-NEXT: andps %xmm2, %xmm0
+; SSE-NEXT: andps %xmm2, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: _clearupper16xi16a:
@@ -364,10 +291,9 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@@ -375,31 +301,32 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm3
-; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
@@ -486,10 +413,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@@ -497,31 +423,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm3
-; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@@ -531,10 +458,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm4
+; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@@ -542,31 +468,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm3
+; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
+; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm5
-; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
+; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm6
-; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: retq
;
diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll
index 4140721bd5f3..33d001cdc216 100644
--- a/test/CodeGen/X86/fast-isel-nontemporal.ll
+++ b/test/CodeGen/X86/fast-isel-nontemporal.ll
@@ -545,7 +545,11 @@ define <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xfloat:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xfloat:
@@ -583,7 +587,11 @@ define <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt4xdouble:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovapd (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt4xdouble:
@@ -621,7 +629,11 @@ define <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt32xi8:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt32xi8:
@@ -659,7 +671,11 @@ define <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt16xi16:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xi16:
@@ -697,7 +713,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xi32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xi32:
@@ -735,7 +755,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt4xi64:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt4xi64:
@@ -957,8 +981,16 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt16xfloat:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xfloat:
@@ -1003,8 +1035,16 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xdouble:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovapd (%rdi), %ymm0
-; AVX1-NEXT: vmovapd 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xdouble:
@@ -1049,8 +1089,16 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt64xi8:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt64xi8:
@@ -1101,8 +1149,16 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt32xi16:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt32xi16:
@@ -1153,8 +1209,16 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt16xi32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xi32:
@@ -1199,8 +1263,16 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xi64:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xi64:
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 85b2b41fa191..068480873c23 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,16 +1,10 @@
; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s
-; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
+; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck %s
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
-; ATOM: foo
-; ATOM: addl
-; ATOM: addl
-; ATOM: leal
; CHECK: foo
-; CHECK: addl
-; CHECK: addl
-; CHECK: addl
+; CHECK: incl
entry:
%0 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/haddsub-2.ll b/test/CodeGen/X86/haddsub-2.ll
index 4596b83f7bc2..fd023d018031 100644
--- a/test/CodeGen/X86/haddsub-2.ll
+++ b/test/CodeGen/X86/haddsub-2.ll
@@ -142,12 +142,12 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %edi
; SSE3-NEXT: addl %eax, %edi
; SSE3-NEXT: movd %edi, %xmm0
-; SSE3-NEXT: movd %edx, %xmm1
+; SSE3-NEXT: movd %esi, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: movd %esi, %xmm2
+; SSE3-NEXT: movd %edx, %xmm2
; SSE3-NEXT: movd %ecx, %xmm0
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: phadd_d_test1:
@@ -196,16 +196,16 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %esi
; SSE3-NEXT: addl %eax, %esi
; SSE3-NEXT: movd %esi, %xmm0
+; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
+; SSE3-NEXT: movd %xmm2, %eax
+; SSE3-NEXT: movd %xmm1, %esi
+; SSE3-NEXT: addl %eax, %esi
+; SSE3-NEXT: movd %esi, %xmm1
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE3-NEXT: movd %ecx, %xmm2
-; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %eax
-; SSE3-NEXT: movd %xmm1, %ecx
-; SSE3-NEXT: addl %eax, %ecx
-; SSE3-NEXT: movd %ecx, %xmm1
; SSE3-NEXT: movd %edx, %xmm0
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: phadd_d_test2:
@@ -258,12 +258,12 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %edi
; SSE3-NEXT: subl %edi, %esi
; SSE3-NEXT: movd %esi, %xmm0
-; SSE3-NEXT: movd %ecx, %xmm1
+; SSE3-NEXT: movd %edx, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: movd %edx, %xmm2
+; SSE3-NEXT: movd %ecx, %xmm2
; SSE3-NEXT: movd %eax, %xmm0
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: phsub_d_test1:
@@ -312,16 +312,16 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %esi
; SSE3-NEXT: subl %esi, %edx
; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: movd %xmm1, %edx
+; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE3-NEXT: movd %xmm1, %esi
+; SSE3-NEXT: subl %esi, %edx
+; SSE3-NEXT: movd %edx, %xmm1
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE3-NEXT: movd %eax, %xmm2
-; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE3-NEXT: movd %xmm1, %eax
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %edx
-; SSE3-NEXT: subl %edx, %eax
-; SSE3-NEXT: movd %eax, %xmm1
; SSE3-NEXT: movd %ecx, %xmm0
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: phsub_d_test2:
@@ -518,19 +518,19 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE3-NEXT: movd %xmm0, %r9d
; SSE3-NEXT: addl %edx, %r9d
-; SSE3-NEXT: movd %xmm1, %esi
+; SSE3-NEXT: movd %xmm1, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %r10d
-; SSE3-NEXT: addl %esi, %r10d
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE3-NEXT: movd %xmm0, %esi
+; SSE3-NEXT: addl %edx, %esi
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
; SSE3-NEXT: movd %xmm0, %edi
-; SSE3-NEXT: addl %esi, %edi
+; SSE3-NEXT: addl %edx, %edi
; SSE3-NEXT: movd %xmm2, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %r11d
-; SSE3-NEXT: addl %eax, %r11d
+; SSE3-NEXT: movd %xmm0, %r10d
+; SSE3-NEXT: addl %eax, %r10d
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
; SSE3-NEXT: movd %xmm0, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
@@ -541,24 +541,24 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %edx
; SSE3-NEXT: addl %eax, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
-; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: movd %xmm0, %r11d
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
-; SSE3-NEXT: movd %xmm0, %esi
-; SSE3-NEXT: addl %eax, %esi
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: addl %r11d, %eax
; SSE3-NEXT: movd %edi, %xmm0
-; SSE3-NEXT: movd %r9d, %xmm1
+; SSE3-NEXT: movd %esi, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: movd %r10d, %xmm2
+; SSE3-NEXT: movd %r9d, %xmm2
; SSE3-NEXT: movd %r8d, %xmm0
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE3-NEXT: movd %esi, %xmm1
-; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: movd %edx, %xmm2
; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT: movd %edx, %xmm3
-; SSE3-NEXT: movd %r11d, %xmm1
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: movd %r10d, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: avx2_vphadd_d_test:
@@ -658,83 +658,83 @@ define <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) {
; SSE3-NEXT: addl %eax, %ecx
; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
; SSE3-NEXT: pextrw $2, %xmm0, %eax
-; SSE3-NEXT: pextrw $3, %xmm0, %r11d
-; SSE3-NEXT: addl %eax, %r11d
+; SSE3-NEXT: pextrw $3, %xmm0, %ecx
+; SSE3-NEXT: addl %eax, %ecx
+; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
; SSE3-NEXT: pextrw $4, %xmm0, %eax
-; SSE3-NEXT: pextrw $5, %xmm0, %r10d
-; SSE3-NEXT: addl %eax, %r10d
+; SSE3-NEXT: pextrw $5, %xmm0, %r11d
+; SSE3-NEXT: addl %eax, %r11d
; SSE3-NEXT: pextrw $6, %xmm0, %eax
-; SSE3-NEXT: pextrw $7, %xmm0, %r13d
-; SSE3-NEXT: addl %eax, %r13d
+; SSE3-NEXT: pextrw $7, %xmm0, %r15d
+; SSE3-NEXT: addl %eax, %r15d
; SSE3-NEXT: movd %xmm1, %eax
-; SSE3-NEXT: pextrw $1, %xmm1, %r14d
-; SSE3-NEXT: addl %eax, %r14d
+; SSE3-NEXT: pextrw $1, %xmm1, %r13d
+; SSE3-NEXT: addl %eax, %r13d
; SSE3-NEXT: pextrw $2, %xmm1, %eax
-; SSE3-NEXT: pextrw $3, %xmm1, %ebp
-; SSE3-NEXT: addl %eax, %ebp
-; SSE3-NEXT: pextrw $4, %xmm1, %eax
-; SSE3-NEXT: pextrw $5, %xmm1, %ebx
+; SSE3-NEXT: pextrw $3, %xmm1, %ebx
; SSE3-NEXT: addl %eax, %ebx
+; SSE3-NEXT: pextrw $4, %xmm1, %eax
+; SSE3-NEXT: pextrw $5, %xmm1, %r8d
+; SSE3-NEXT: addl %eax, %r8d
; SSE3-NEXT: pextrw $6, %xmm1, %eax
-; SSE3-NEXT: pextrw $7, %xmm1, %edx
-; SSE3-NEXT: addl %eax, %edx
+; SSE3-NEXT: pextrw $7, %xmm1, %esi
+; SSE3-NEXT: addl %eax, %esi
; SSE3-NEXT: movd %xmm2, %eax
-; SSE3-NEXT: pextrw $1, %xmm2, %ecx
-; SSE3-NEXT: addl %eax, %ecx
-; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
+; SSE3-NEXT: pextrw $1, %xmm2, %r10d
+; SSE3-NEXT: addl %eax, %r10d
; SSE3-NEXT: pextrw $2, %xmm2, %eax
-; SSE3-NEXT: pextrw $3, %xmm2, %r12d
-; SSE3-NEXT: addl %eax, %r12d
+; SSE3-NEXT: pextrw $3, %xmm2, %r14d
+; SSE3-NEXT: addl %eax, %r14d
; SSE3-NEXT: pextrw $4, %xmm2, %eax
-; SSE3-NEXT: pextrw $5, %xmm2, %r15d
-; SSE3-NEXT: addl %eax, %r15d
+; SSE3-NEXT: pextrw $5, %xmm2, %r12d
+; SSE3-NEXT: addl %eax, %r12d
; SSE3-NEXT: pextrw $6, %xmm2, %eax
-; SSE3-NEXT: pextrw $7, %xmm2, %r8d
-; SSE3-NEXT: addl %eax, %r8d
-; SSE3-NEXT: movd %xmm3, %eax
-; SSE3-NEXT: pextrw $1, %xmm3, %r9d
+; SSE3-NEXT: pextrw $7, %xmm2, %r9d
; SSE3-NEXT: addl %eax, %r9d
-; SSE3-NEXT: pextrw $2, %xmm3, %eax
-; SSE3-NEXT: pextrw $3, %xmm3, %esi
-; SSE3-NEXT: addl %eax, %esi
-; SSE3-NEXT: pextrw $4, %xmm3, %eax
-; SSE3-NEXT: pextrw $5, %xmm3, %edi
-; SSE3-NEXT: addl %eax, %edi
-; SSE3-NEXT: pextrw $6, %xmm3, %ecx
+; SSE3-NEXT: movd %xmm3, %eax
+; SSE3-NEXT: pextrw $1, %xmm3, %ebp
+; SSE3-NEXT: addl %eax, %ebp
+; SSE3-NEXT: pextrw $2, %xmm3, %edx
+; SSE3-NEXT: pextrw $3, %xmm3, %edi
+; SSE3-NEXT: addl %edx, %edi
+; SSE3-NEXT: pextrw $4, %xmm3, %edx
+; SSE3-NEXT: pextrw $5, %xmm3, %ecx
+; SSE3-NEXT: addl %edx, %ecx
+; SSE3-NEXT: pextrw $6, %xmm3, %edx
; SSE3-NEXT: pextrw $7, %xmm3, %eax
-; SSE3-NEXT: addl %ecx, %eax
-; SSE3-NEXT: movd %edx, %xmm8
-; SSE3-NEXT: movd %r13d, %xmm3
-; SSE3-NEXT: movd %ebp, %xmm9
-; SSE3-NEXT: movd %r11d, %xmm4
-; SSE3-NEXT: movd %ebx, %xmm10
-; SSE3-NEXT: movd %r10d, %xmm7
-; SSE3-NEXT: movd %r14d, %xmm11
+; SSE3-NEXT: addl %edx, %eax
+; SSE3-NEXT: movd %esi, %xmm8
+; SSE3-NEXT: movd %r8d, %xmm3
+; SSE3-NEXT: movd %ebx, %xmm9
+; SSE3-NEXT: movd %r13d, %xmm4
+; SSE3-NEXT: movd %r15d, %xmm10
+; SSE3-NEXT: movd %r11d, %xmm7
+; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm11 # 4-byte Folded Reload
+; SSE3-NEXT: # xmm11 = mem[0],zero,zero,zero
; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero
; SSE3-NEXT: movd %eax, %xmm12
-; SSE3-NEXT: movd %r8d, %xmm6
-; SSE3-NEXT: movd %esi, %xmm13
-; SSE3-NEXT: movd %r12d, %xmm5
-; SSE3-NEXT: movd %edi, %xmm14
-; SSE3-NEXT: movd %r15d, %xmm2
-; SSE3-NEXT: movd %r9d, %xmm15
-; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
-; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero
+; SSE3-NEXT: movd %ecx, %xmm6
+; SSE3-NEXT: movd %edi, %xmm13
+; SSE3-NEXT: movd %ebp, %xmm5
+; SSE3-NEXT: movd %r9d, %xmm14
+; SSE3-NEXT: movd %r12d, %xmm2
+; SSE3-NEXT: movd %r14d, %xmm15
+; SSE3-NEXT: movd %r10d, %xmm1
; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE3-NEXT: popq %rbx
; SSE3-NEXT: popq %r12
; SSE3-NEXT: popq %r13
@@ -858,12 +858,12 @@ define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) {
; SSE-NEXT: movd %xmm0, %edi
; SSE-NEXT: subl %edi, %esi
; SSE-NEXT: movd %esi, %xmm0
-; SSE-NEXT: movd %ecx, %xmm1
+; SSE-NEXT: movd %edx, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: movd %edx, %xmm2
+; SSE-NEXT: movd %ecx, %xmm2
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: not_a_hsub_1:
@@ -919,11 +919,11 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: movaps %xmm1, %xmm4
; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1]
; SSE-NEXT: subss %xmm4, %xmm3
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; SSE-NEXT: subss %xmm3, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; SSE-NEXT: subss %xmm4, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: not_a_hsub_2:
@@ -1162,19 +1162,19 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE3-NEXT: movd %xmm0, %r9d
; SSE3-NEXT: addl %edx, %r9d
-; SSE3-NEXT: movd %xmm2, %esi
+; SSE3-NEXT: movd %xmm2, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %r10d
-; SSE3-NEXT: addl %esi, %r10d
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
; SSE3-NEXT: movd %xmm0, %esi
+; SSE3-NEXT: addl %edx, %esi
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
; SSE3-NEXT: movd %xmm0, %edi
-; SSE3-NEXT: addl %esi, %edi
+; SSE3-NEXT: addl %edx, %edi
; SSE3-NEXT: movd %xmm1, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %r11d
-; SSE3-NEXT: addl %eax, %r11d
+; SSE3-NEXT: movd %xmm0, %r10d
+; SSE3-NEXT: addl %eax, %r10d
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE3-NEXT: movd %xmm0, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
@@ -1185,24 +1185,24 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
; SSE3-NEXT: movd %xmm0, %edx
; SSE3-NEXT: addl %eax, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
-; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: movd %xmm0, %r11d
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
-; SSE3-NEXT: movd %xmm0, %esi
-; SSE3-NEXT: addl %eax, %esi
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: addl %r11d, %eax
; SSE3-NEXT: movd %edi, %xmm0
-; SSE3-NEXT: movd %r9d, %xmm1
+; SSE3-NEXT: movd %esi, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: movd %r10d, %xmm2
+; SSE3-NEXT: movd %r9d, %xmm2
; SSE3-NEXT: movd %r8d, %xmm0
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE3-NEXT: movd %esi, %xmm1
-; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: movd %edx, %xmm2
; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT: movd %edx, %xmm3
-; SSE3-NEXT: movd %r11d, %xmm1
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: movd %r10d, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: avx2_hadd_d:
@@ -1293,15 +1293,14 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
; SSE3-NEXT: .Lcfi23:
; SSE3-NEXT: .cfi_offset %rbp, -16
; SSE3-NEXT: movd %xmm0, %eax
-; SSE3-NEXT: pextrw $1, %xmm0, %ecx
-; SSE3-NEXT: addl %eax, %ecx
-; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
+; SSE3-NEXT: pextrw $1, %xmm0, %r10d
+; SSE3-NEXT: addl %eax, %r10d
; SSE3-NEXT: pextrw $2, %xmm0, %eax
-; SSE3-NEXT: pextrw $3, %xmm0, %r15d
-; SSE3-NEXT: addl %eax, %r15d
+; SSE3-NEXT: pextrw $3, %xmm0, %r11d
+; SSE3-NEXT: addl %eax, %r11d
; SSE3-NEXT: pextrw $4, %xmm0, %eax
-; SSE3-NEXT: pextrw $5, %xmm0, %r14d
-; SSE3-NEXT: addl %eax, %r14d
+; SSE3-NEXT: pextrw $5, %xmm0, %r12d
+; SSE3-NEXT: addl %eax, %r12d
; SSE3-NEXT: pextrw $6, %xmm0, %eax
; SSE3-NEXT: pextrw $7, %xmm0, %r13d
; SSE3-NEXT: addl %eax, %r13d
@@ -1310,70 +1309,71 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
; SSE3-NEXT: addl %eax, %ecx
; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
; SSE3-NEXT: pextrw $2, %xmm1, %eax
-; SSE3-NEXT: pextrw $3, %xmm1, %r11d
-; SSE3-NEXT: addl %eax, %r11d
-; SSE3-NEXT: pextrw $4, %xmm1, %eax
-; SSE3-NEXT: pextrw $5, %xmm1, %r10d
-; SSE3-NEXT: addl %eax, %r10d
-; SSE3-NEXT: pextrw $6, %xmm1, %eax
-; SSE3-NEXT: pextrw $7, %xmm1, %r12d
-; SSE3-NEXT: addl %eax, %r12d
-; SSE3-NEXT: movd %xmm2, %eax
-; SSE3-NEXT: pextrw $1, %xmm2, %ebx
-; SSE3-NEXT: addl %eax, %ebx
-; SSE3-NEXT: pextrw $2, %xmm2, %eax
-; SSE3-NEXT: pextrw $3, %xmm2, %ecx
+; SSE3-NEXT: pextrw $3, %xmm1, %ecx
; SSE3-NEXT: addl %eax, %ecx
+; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
+; SSE3-NEXT: pextrw $4, %xmm1, %eax
+; SSE3-NEXT: pextrw $5, %xmm1, %r14d
+; SSE3-NEXT: addl %eax, %r14d
+; SSE3-NEXT: pextrw $6, %xmm1, %esi
+; SSE3-NEXT: pextrw $7, %xmm1, %r15d
+; SSE3-NEXT: addl %esi, %r15d
+; SSE3-NEXT: movd %xmm2, %esi
+; SSE3-NEXT: pextrw $1, %xmm2, %ebp
+; SSE3-NEXT: addl %esi, %ebp
+; SSE3-NEXT: pextrw $2, %xmm2, %esi
+; SSE3-NEXT: pextrw $3, %xmm2, %edi
+; SSE3-NEXT: addl %esi, %edi
; SSE3-NEXT: pextrw $4, %xmm2, %esi
-; SSE3-NEXT: pextrw $5, %xmm2, %r8d
-; SSE3-NEXT: addl %esi, %r8d
+; SSE3-NEXT: pextrw $5, %xmm2, %eax
+; SSE3-NEXT: addl %esi, %eax
; SSE3-NEXT: pextrw $6, %xmm2, %esi
-; SSE3-NEXT: pextrw $7, %xmm2, %edx
-; SSE3-NEXT: addl %esi, %edx
-; SSE3-NEXT: movd %xmm3, %edi
+; SSE3-NEXT: pextrw $7, %xmm2, %ecx
+; SSE3-NEXT: addl %esi, %ecx
+; SSE3-NEXT: movd %xmm3, %ebx
; SSE3-NEXT: pextrw $1, %xmm3, %r9d
-; SSE3-NEXT: addl %edi, %r9d
-; SSE3-NEXT: pextrw $2, %xmm3, %ebp
-; SSE3-NEXT: pextrw $3, %xmm3, %edi
-; SSE3-NEXT: addl %ebp, %edi
-; SSE3-NEXT: pextrw $4, %xmm3, %eax
-; SSE3-NEXT: pextrw $5, %xmm3, %ebp
-; SSE3-NEXT: addl %eax, %ebp
-; SSE3-NEXT: pextrw $6, %xmm3, %esi
-; SSE3-NEXT: pextrw $7, %xmm3, %eax
-; SSE3-NEXT: addl %esi, %eax
-; SSE3-NEXT: movd %edx, %xmm8
-; SSE3-NEXT: movd %r13d, %xmm3
-; SSE3-NEXT: movd %ecx, %xmm9
-; SSE3-NEXT: movd %r15d, %xmm4
-; SSE3-NEXT: movd %r8d, %xmm10
-; SSE3-NEXT: movd %r14d, %xmm7
-; SSE3-NEXT: movd %ebx, %xmm11
-; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
-; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero
-; SSE3-NEXT: movd %eax, %xmm12
-; SSE3-NEXT: movd %r12d, %xmm6
-; SSE3-NEXT: movd %edi, %xmm13
-; SSE3-NEXT: movd %r11d, %xmm5
-; SSE3-NEXT: movd %ebp, %xmm14
-; SSE3-NEXT: movd %r10d, %xmm2
-; SSE3-NEXT: movd %r9d, %xmm15
+; SSE3-NEXT: addl %ebx, %r9d
+; SSE3-NEXT: pextrw $2, %xmm3, %edx
+; SSE3-NEXT: pextrw $3, %xmm3, %ebx
+; SSE3-NEXT: addl %edx, %ebx
+; SSE3-NEXT: pextrw $4, %xmm3, %edx
+; SSE3-NEXT: pextrw $5, %xmm3, %esi
+; SSE3-NEXT: addl %edx, %esi
+; SSE3-NEXT: pextrw $6, %xmm3, %r8d
+; SSE3-NEXT: pextrw $7, %xmm3, %edx
+; SSE3-NEXT: addl %r8d, %edx
+; SSE3-NEXT: movd %ecx, %xmm8
+; SSE3-NEXT: movd %eax, %xmm3
+; SSE3-NEXT: movd %edi, %xmm9
+; SSE3-NEXT: movd %ebp, %xmm4
+; SSE3-NEXT: movd %r13d, %xmm10
+; SSE3-NEXT: movd %r12d, %xmm7
+; SSE3-NEXT: movd %r11d, %xmm11
+; SSE3-NEXT: movd %r10d, %xmm0
+; SSE3-NEXT: movd %edx, %xmm12
+; SSE3-NEXT: movd %esi, %xmm6
+; SSE3-NEXT: movd %ebx, %xmm13
+; SSE3-NEXT: movd %r9d, %xmm5
+; SSE3-NEXT: movd %r15d, %xmm14
+; SSE3-NEXT: movd %r14d, %xmm2
+; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm15 # 4-byte Folded Reload
+; SSE3-NEXT: # xmm15 = mem[0],zero,zero,zero
; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero
; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE3-NEXT: popq %rbx
; SSE3-NEXT: popq %r12
; SSE3-NEXT: popq %r13
diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll
index 6d79d4de5206..091d1a22dbcd 100644
--- a/test/CodeGen/X86/haddsub-undef.ll
+++ b/test/CodeGen/X86/haddsub-undef.ll
@@ -171,9 +171,8 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: addss %xmm2, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test8_undef:
diff --git a/test/CodeGen/X86/hoist-spill.ll b/test/CodeGen/X86/hoist-spill.ll
index afabf96b12a3..03f558fc3ae2 100644
--- a/test/CodeGen/X86/hoist-spill.ll
+++ b/test/CodeGen/X86/hoist-spill.ll
@@ -3,10 +3,8 @@
; Check no spills to the same stack slot after hoisting.
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
-; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
-; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp)
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 786534b00d39..56f4161147b4 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -4,16 +4,19 @@
; By starting the IV at -64 instead of 0, a cmp is eliminated,
; as the flags from the add can be used directly.
-; STATIC: movl $-64, [[ECX:%e..]]
+; STATIC: movl $-64, [[EAX:%e..]]
-; STATIC: movl [[EAX:%e..]], _state+76([[ECX]])
-; STATIC: addl $16, [[ECX]]
+; STATIC: movl %{{.+}}, _state+76([[EAX]])
+; STATIC: addl $16, [[EAX]]
; STATIC: jne
-; In PIC mode the symbol can't be folded, so the change-compare-stride
-; trick applies.
+; The same for PIC mode.
-; PIC: cmpl $64
+; PIC: movl $-64, [[EAX:%e..]]
+
+; PIC: movl %{{.+}}, 76(%{{.+}},[[EAX]])
+; PIC: addl $16, [[EAX]]
+; PIC: jne
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/madd.ll b/test/CodeGen/X86/madd.ll
index af86df510016..7c2bb822c967 100644
--- a/test/CodeGen/X86/madd.ll
+++ b/test/CodeGen/X86/madd.ll
@@ -9,17 +9,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movl %edx, %eax
; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: xorl %ecx, %ecx
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: .p2align 4, 0x90
; SSE2-NEXT: .LBB0_1: # %vector.body
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT: movdqu (%rdi), %xmm2
-; SSE2-NEXT: movdqu (%rsi), %xmm3
+; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2
+; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3
; SSE2-NEXT: pmaddwd %xmm2, %xmm3
; SSE2-NEXT: paddd %xmm3, %xmm1
-; SSE2-NEXT: addq $16, %rsi
-; SSE2-NEXT: addq $16, %rdi
-; SSE2-NEXT: addq $-8, %rax
+; SSE2-NEXT: addq $8, %rcx
+; SSE2-NEXT: cmpq %rcx, %rax
; SSE2-NEXT: jne .LBB0_1
; SSE2-NEXT: # BB#2: # %middle.block
; SSE2-NEXT: paddd %xmm0, %xmm1
@@ -34,17 +34,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
; AVX2: # BB#0: # %entry
; AVX2-NEXT: movl %edx, %eax
; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: xorl %ecx, %ecx
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB0_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX2-NEXT: vmovdqu (%rsi), %xmm2
-; AVX2-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2
+; AVX2-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2
+; AVX2-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: addq $16, %rsi
-; AVX2-NEXT: addq $16, %rdi
-; AVX2-NEXT: addq $-8, %rax
+; AVX2-NEXT: addq $8, %rcx
+; AVX2-NEXT: cmpq %rcx, %rax
; AVX2-NEXT: jne .LBB0_1
; AVX2-NEXT: # BB#2: # %middle.block
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -60,17 +60,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movl %edx, %eax
; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: xorl %ecx, %ecx
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB0_1: # %vector.body
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX512-NEXT: vmovdqu (%rsi), %xmm2
-; AVX512-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2
+; AVX512-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2
+; AVX512-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0
-; AVX512-NEXT: addq $16, %rsi
-; AVX512-NEXT: addq $16, %rdi
-; AVX512-NEXT: addq $-8, %rax
+; AVX512-NEXT: addq $8, %rcx
+; AVX512-NEXT: cmpq %rcx, %rax
; AVX512-NEXT: jne .LBB0_1
; AVX512-NEXT: # BB#2: # %middle.block
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -118,12 +118,13 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movl %edx, %eax
; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: xorl %ecx, %ecx
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: .p2align 4, 0x90
; SSE2-NEXT: .LBB1_1: # %vector.body
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT: movdqu (%rdi), %xmm2
-; SSE2-NEXT: movdqu (%rsi), %xmm3
+; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2
+; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: pmulhuw %xmm2, %xmm4
; SSE2-NEXT: pmullw %xmm2, %xmm3
@@ -132,9 +133,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; SSE2-NEXT: paddd %xmm2, %xmm0
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
; SSE2-NEXT: paddd %xmm3, %xmm1
-; SSE2-NEXT: addq $16, %rsi
-; SSE2-NEXT: addq $16, %rdi
-; SSE2-NEXT: addq $-8, %rax
+; SSE2-NEXT: addq $8, %rcx
+; SSE2-NEXT: cmpq %rcx, %rax
; SSE2-NEXT: jne .LBB1_1
; SSE2-NEXT: # BB#2: # %middle.block
; SSE2-NEXT: paddd %xmm1, %xmm0
@@ -149,6 +149,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; AVX2: # BB#0: # %entry
; AVX2-NEXT: movl %edx, %eax
; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: xorl %ecx, %ecx
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB1_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
@@ -156,9 +157,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX2-NEXT: vpmulld %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: addq $16, %rsi
-; AVX2-NEXT: addq $16, %rdi
-; AVX2-NEXT: addq $-8, %rax
+; AVX2-NEXT: addq $8, %rcx
+; AVX2-NEXT: cmpq %rcx, %rax
; AVX2-NEXT: jne .LBB1_1
; AVX2-NEXT: # BB#2: # %middle.block
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -174,6 +174,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movl %edx, %eax
; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: xorl %ecx, %ecx
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB1_1: # %vector.body
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
@@ -181,9 +182,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX512-NEXT: vpmulld %ymm1, %ymm2, %ymm1
; AVX512-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: addq $16, %rsi
-; AVX512-NEXT: addq $16, %rdi
-; AVX512-NEXT: addq $-8, %rax
+; AVX512-NEXT: addq $8, %rcx
+; AVX512-NEXT: cmpq %rcx, %rax
; AVX512-NEXT: jne .LBB1_1
; AVX512-NEXT: # BB#2: # %middle.block
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -231,6 +231,7 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movl %edx, %eax
; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: xorl %ecx, %ecx
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: pxor %xmm2, %xmm2
@@ -263,9 +264,8 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
; SSE2-NEXT: psrad $16, %xmm4
; SSE2-NEXT: paddd %xmm4, %xmm2
-; SSE2-NEXT: addq $16, %rsi
-; SSE2-NEXT: addq $16, %rdi
-; SSE2-NEXT: addq $-16, %rax
+; SSE2-NEXT: addq $16, %rcx
+; SSE2-NEXT: cmpq %rcx, %rax
; SSE2-NEXT: jne .LBB2_1
; SSE2-NEXT: # BB#2: # %middle.block
; SSE2-NEXT: paddd %xmm3, %xmm0
@@ -282,17 +282,17 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
; AVX2: # BB#0: # %entry
; AVX2-NEXT: movl %edx, %eax
; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: xorl %ecx, %ecx
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB2_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX2-NEXT: vpmovsxbw (%rdi), %ymm2
-; AVX2-NEXT: vpmovsxbw (%rsi), %ymm3
+; AVX2-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2
+; AVX2-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3
; AVX2-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: addq $16, %rsi
-; AVX2-NEXT: addq $16, %rdi
-; AVX2-NEXT: addq $-16, %rax
+; AVX2-NEXT: addq $16, %rcx
+; AVX2-NEXT: cmpq %rcx, %rax
; AVX2-NEXT: jne .LBB2_1
; AVX2-NEXT: # BB#2: # %middle.block
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
@@ -309,18 +309,18 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movl %edx, %eax
; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: xorl %ecx, %ecx
; AVX512-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB2_1: # %vector.body
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX512-NEXT: vpmovsxbw (%rdi), %ymm2
-; AVX512-NEXT: vpmovsxbw (%rsi), %ymm3
+; AVX512-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2
+; AVX512-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3
; AVX512-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm2
; AVX512-NEXT: vpaddd %zmm0, %zmm2, %zmm0
-; AVX512-NEXT: addq $16, %rsi
-; AVX512-NEXT: addq $16, %rdi
-; AVX512-NEXT: addq $-16, %rax
+; AVX512-NEXT: addq $16, %rcx
+; AVX512-NEXT: cmpq %rcx, %rax
; AVX512-NEXT: jne .LBB2_1
; AVX512-NEXT: # BB#2: # %middle.block
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 8c0a4d4f1752..61aa05a5270b 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -5,7 +5,7 @@
; CHECK-LABEL: count_up
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: incq
+; CHECK: addq $8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_up(double* %d, i64 %n) nounwind {
@@ -38,7 +38,7 @@ return:
; CHECK-LABEL: count_down
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $-8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_down(double* %d, i64 %n) nounwind {
@@ -71,7 +71,7 @@ return:
; CHECK-LABEL: count_up_signed
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: incq
+; CHECK: addq $8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_up_signed(double* %d, i64 %n) nounwind {
@@ -106,7 +106,7 @@ return:
; CHECK-LABEL: count_down_signed
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $-8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_down_signed(double* %d, i64 %n) nounwind {
@@ -141,7 +141,7 @@ return:
; CHECK-LABEL: another_count_up
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_up(double* %d, i64 %n) nounwind {
@@ -174,7 +174,7 @@ return:
; CHECK-LABEL: another_count_down
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq $-8,
+; CHECK: addq $-8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_down(double* %d, i64 %n) nounwind {
@@ -207,7 +207,7 @@ return:
; CHECK-LABEL: another_count_up_signed
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_up_signed(double* %d, i64 %n) nounwind {
@@ -242,7 +242,7 @@ return:
; CHECK-LABEL: another_count_down_signed
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: decq
+; CHECK: addq $-8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_down_signed(double* %d, i64 %n) nounwind {
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index ce1bb3b06ce5..4e2475b1c67d 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -10,9 +10,28 @@
declare i32 @memcmp(i8*, i8*, i64)
-define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+define i32 @length2(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length2:
; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $2
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length2:
+; X64: # BB#0:
+; X64-NEXT: movl $2, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length2_eq:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movzwl (%ecx), %ecx
@@ -20,7 +39,7 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length2:
+; X64-LABEL: length2_eq:
; X64: # BB#0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: cmpw (%rsi), %ax
@@ -31,8 +50,8 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length2_const:
+define i1 @length2_eq_const(i8* %X) nounwind {
+; X32-LABEL: length2_eq_const:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl (%eax), %eax
@@ -40,7 +59,7 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; X64-LABEL: length2_const:
+; X64-LABEL: length2_eq_const:
; X64: # BB#0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
@@ -51,8 +70,8 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
-; X32-LABEL: length2_nobuiltin_attr:
+define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length2_eq_nobuiltin_attr:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $2
@@ -64,7 +83,7 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length2_nobuiltin_attr:
+; X64-LABEL: length2_eq_nobuiltin_attr:
; X64: # BB#0:
; X64-NEXT: pushq %rax
; X64-NEXT: movl $2, %edx
@@ -78,9 +97,74 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+define i32 @length3(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length3:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $3
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length3:
+; X64: # BB#0:
+; X64-NEXT: movl $3, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length3_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $3
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: setne %al
+; X32-NEXT: retl
+;
+; X64-LABEL: length3_eq:
+; X64: # BB#0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $3, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length4:
; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length4:
+; X64: # BB#0:
+; X64-NEXT: movl $4, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length4_eq:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%ecx), %ecx
@@ -88,7 +172,7 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; X64-LABEL: length4:
+; X64-LABEL: length4_eq:
; X64: # BB#0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: cmpl (%rsi), %eax
@@ -99,15 +183,15 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length4_const:
+define i1 @length4_eq_const(i8* %X) nounwind {
+; X32-LABEL: length4_eq_const:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length4_const:
+; X64-LABEL: length4_eq_const:
; X64: # BB#0:
; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
; X64-NEXT: sete %al
@@ -117,7 +201,53 @@ define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+define i32 @length5(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length5:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $5
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length5:
+; X64: # BB#0:
+; X64-NEXT: movl $5, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length5_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $5
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: setne %al
+; X32-NEXT: retl
+;
+; X64-LABEL: length5_eq:
+; X64: # BB#0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $5, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length8:
; X32: # BB#0:
; X32-NEXT: pushl $0
@@ -126,11 +256,30 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
; X32-NEXT: pushl {{[0-9]+}}(%esp)
; X32-NEXT: calll memcmp
; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length8:
+; X64: # BB#0:
+; X64-NEXT: movl $8, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length8_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $8
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: testl %eax, %eax
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length8:
+; X64-LABEL: length8_eq:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: cmpq (%rsi), %rax
@@ -141,8 +290,8 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length8_const:
+define i1 @length8_eq_const(i8* %X) nounwind {
+; X32-LABEL: length8_eq_const:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $8
@@ -154,7 +303,7 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; X64-LABEL: length8_const:
+; X64-LABEL: length8_eq_const:
; X64: # BB#0:
; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
; X64-NEXT: cmpq %rax, (%rdi)
@@ -165,7 +314,55 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length16(i8* %x, i8* %y) nounwind {
+define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length12_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $12
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: setne %al
+; X32-NEXT: retl
+;
+; X64-LABEL: length12_eq:
+; X64: # BB#0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $12, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length12:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $12
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length12:
+; X64: # BB#0:
+; X64-NEXT: movl $12, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
+ ret i32 %m
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length16:
; X32: # BB#0:
; X32-NEXT: pushl $0
@@ -174,11 +371,30 @@ define i1 @length16(i8* %x, i8* %y) nounwind {
; X32-NEXT: pushl {{[0-9]+}}(%esp)
; X32-NEXT: calll memcmp
; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length16:
+; X64: # BB#0:
+; X64-NEXT: movl $16, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(i8* %x, i8* %y) nounwind {
+; X32-LABEL: length16_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $16
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: testl %eax, %eax
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; SSE2-LABEL: length16:
+; SSE2-LABEL: length16_eq:
; SSE2: # BB#0:
; SSE2-NEXT: movdqu (%rsi), %xmm0
; SSE2-NEXT: movdqu (%rdi), %xmm1
@@ -188,7 +404,7 @@ define i1 @length16(i8* %x, i8* %y) nounwind {
; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
-; AVX2-LABEL: length16:
+; AVX2-LABEL: length16_eq:
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
; AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
@@ -201,8 +417,8 @@ define i1 @length16(i8* %x, i8* %y) nounwind {
ret i1 %cmp
}
-define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length16_const:
+define i1 @length16_eq_const(i8* %X) nounwind {
+; X32-LABEL: length16_eq_const:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $16
@@ -214,7 +430,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; SSE2-LABEL: length16_const:
+; SSE2-LABEL: length16_eq_const:
; SSE2: # BB#0:
; SSE2-NEXT: movdqu (%rdi), %xmm0
; SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
@@ -223,7 +439,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
-; AVX2-LABEL: length16_const:
+; AVX2-LABEL: length16_eq_const:
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
@@ -236,7 +452,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length32(i8* %x, i8* %y) nounwind {
+define i32 @length32(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length32:
; X32: # BB#0:
; X32-NEXT: pushl $0
@@ -245,11 +461,32 @@ define i1 @length32(i8* %x, i8* %y) nounwind {
; X32-NEXT: pushl {{[0-9]+}}(%esp)
; X32-NEXT: calll memcmp
; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length32:
+; X64: # BB#0:
+; X64-NEXT: movl $32, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(i8* %x, i8* %y) nounwind {
+; X32-LABEL: length32_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $32
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: testl %eax, %eax
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; SSE2-LABEL: length32:
+; SSE2-LABEL: length32_eq:
; SSE2: # BB#0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: movl $32, %edx
@@ -259,7 +496,7 @@ define i1 @length32(i8* %x, i8* %y) nounwind {
; SSE2-NEXT: popq %rcx
; SSE2-NEXT: retq
;
-; AVX2-LABEL: length32:
+; AVX2-LABEL: length32_eq:
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqu (%rdi), %ymm0
; AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
@@ -273,8 +510,8 @@ define i1 @length32(i8* %x, i8* %y) nounwind {
ret i1 %cmp
}
-define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length32_const:
+define i1 @length32_eq_const(i8* %X) nounwind {
+; X32-LABEL: length32_eq_const:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $32
@@ -286,7 +523,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; SSE2-LABEL: length32_const:
+; SSE2-LABEL: length32_eq_const:
; SSE2: # BB#0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: movl $.L.str, %esi
@@ -297,7 +534,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
; SSE2-NEXT: popq %rcx
; SSE2-NEXT: retq
;
-; AVX2-LABEL: length32_const:
+; AVX2-LABEL: length32_eq_const:
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqu (%rdi), %ymm0
; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
@@ -311,7 +548,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length64(i8* %x, i8* %y) nounwind {
+define i32 @length64(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length64:
; X32: # BB#0:
; X32-NEXT: pushl $0
@@ -320,11 +557,30 @@ define i1 @length64(i8* %x, i8* %y) nounwind {
; X32-NEXT: pushl {{[0-9]+}}(%esp)
; X32-NEXT: calll memcmp
; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length64:
+; X64: # BB#0:
+; X64-NEXT: movl $64, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(i8* %x, i8* %y) nounwind {
+; X32-LABEL: length64_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $64
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: testl %eax, %eax
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; X64-LABEL: length64:
+; X64-LABEL: length64_eq:
; X64: # BB#0:
; X64-NEXT: pushq %rax
; X64-NEXT: movl $64, %edx
@@ -338,8 +594,8 @@ define i1 @length64(i8* %x, i8* %y) nounwind {
ret i1 %cmp
}
-define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length64_const:
+define i1 @length64_eq_const(i8* %X) nounwind {
+; X32-LABEL: length64_eq_const:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $64
@@ -351,7 +607,7 @@ define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length64_const:
+; X64-LABEL: length64_eq_const:
; X64: # BB#0:
; X64-NEXT: pushq %rax
; X64-NEXT: movl $.L.str, %esi
diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 71417694b0d4..1d5829407b71 100644
--- a/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -269,10 +269,8 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s
; SSE2-LABEL: merge_4f32_f32_012u:
; SSE2: # BB#0:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_012u:
@@ -290,11 +288,11 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s
; X32-SSE1-LABEL: merge_4f32_f32_012u:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_012u:
@@ -320,10 +318,8 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s
; SSE2-LABEL: merge_4f32_f32_019u:
; SSE2: # BB#0:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_019u:
@@ -341,11 +337,11 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s
; X32-SSE1-LABEL: merge_4f32_f32_019u:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_019u:
@@ -1037,13 +1033,11 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin
define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable noinline ssp {
; SSE2-LABEL: merge_4f32_f32_2345_volatile:
; SSE2: # BB#0:
-; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_2345_volatile:
@@ -1065,13 +1059,13 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n
; X32-SSE1-LABEL: merge_4f32_f32_2345_volatile:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_2345_volatile:
diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll
index e3e2737cf3e6..7b39bfe1c484 100644
--- a/test/CodeGen/X86/mul-constant-i16.ll
+++ b/test/CodeGen/X86/mul-constant-i16.ll
@@ -188,13 +188,16 @@ define i16 @test_mul_by_11(i16 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $11, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_11:
; X64: # BB#0:
-; X64-NEXT: imull $11, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rdi,%rax,2), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 11
@@ -225,13 +228,16 @@ define i16 @test_mul_by_13(i16 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $13, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_13:
; X64: # BB#0:
-; X64-NEXT: imull $13, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 13
@@ -241,14 +247,19 @@ define i16 @test_mul_by_13(i16 %x) {
define i16 @test_mul_by_14(i16 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $14, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_14:
; X64: # BB#0:
-; X64-NEXT: imull $14, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 14
@@ -337,14 +348,19 @@ define i16 @test_mul_by_18(i16 %x) {
define i16 @test_mul_by_19(i16 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $19, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_19:
; X64: # BB#0:
-; X64-NEXT: imull $19, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: shll $2, %eax
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 19
@@ -375,13 +391,16 @@ define i16 @test_mul_by_21(i16 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $21, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_21:
; X64: # BB#0:
-; X64-NEXT: imull $21, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 21
@@ -391,14 +410,19 @@ define i16 @test_mul_by_21(i16 %x) {
define i16 @test_mul_by_22(i16 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $22, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_22:
; X64: # BB#0:
-; X64-NEXT: imull $22, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 22
@@ -408,14 +432,19 @@ define i16 @test_mul_by_22(i16 %x) {
define i16 @test_mul_by_23(i16 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $23, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: shll $3, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_23:
; X64: # BB#0:
-; X64-NEXT: imull $23, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: shll $3, %eax
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 23
@@ -465,14 +494,19 @@ define i16 @test_mul_by_25(i16 %x) {
define i16 @test_mul_by_26(i16 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $26, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_26:
; X64: # BB#0:
-; X64-NEXT: imull $26, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 26
@@ -502,14 +536,19 @@ define i16 @test_mul_by_27(i16 %x) {
define i16 @test_mul_by_28(i16 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $28, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_28:
; X64: # BB#0:
-; X64-NEXT: imull $28, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 28
@@ -519,14 +558,21 @@ define i16 @test_mul_by_28(i16 %x) {
define i16 @test_mul_by_29(i16 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $29, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_29:
; X64: # BB#0:
-; X64-NEXT: imull $29, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: addl %edi, %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 29
@@ -536,14 +582,20 @@ define i16 @test_mul_by_29(i16 %x) {
define i16 @test_mul_by_30(i16 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $30, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_30:
; X64: # BB#0:
-; X64-NEXT: imull $30, %edi, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 30
@@ -587,3 +639,30 @@ define i16 @test_mul_by_32(i16 %x) {
%mul = mul nsw i16 %x, 32
ret i16 %mul
}
+
+; (x*9+42)*(x*5+2)
+define i16 @test_mul_spec(i16 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86: # BB#0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal 42(%eax,%eax,8), %ecx
+; X86-NEXT: leal 2(%eax,%eax,4), %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mul_spec:
+; X64: # BB#0:
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal 42(%rdi,%rdi,8), %ecx
+; X64-NEXT: leal 2(%rdi,%rdi,4), %eax
+; X64-NEXT: imull %ecx, %eax
+; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT: retq
+ %mul = mul nsw i16 %x, 9
+ %add = add nsw i16 %mul, 42
+ %mul2 = mul nsw i16 %x, 5
+ %add2 = add nsw i16 %mul2, 2
+ %mul3 = mul nsw i16 %add, %add2
+ ret i16 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll
index 76e46e1f1b09..d545b477e102 100644
--- a/test/CodeGen/X86/mul-constant-i32.ll
+++ b/test/CodeGen/X86/mul-constant-i32.ll
@@ -1,6 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
define i32 @test_mul_by_1(i32 %x) {
; X86-LABEL: test_mul_by_1:
@@ -8,10 +14,40 @@ define i32 @test_mul_by_1(i32 %x) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_1:
-; X64: # BB#0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_1:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_1:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_1:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_1:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_1:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_1:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_1:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 1
ret i32 %mul
}
@@ -23,11 +59,47 @@ define i32 @test_mul_by_2(i32 %x) {
; X86-NEXT: addl %eax, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_2:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_2:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_2:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_2:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: addl %eax, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_2:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_2:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_2:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_2:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 2
ret i32 %mul
}
@@ -38,11 +110,46 @@ define i32 @test_mul_by_3(i32 %x) {
; X86-NEXT: imull $3, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_3:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_3:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_3:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_3:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_3:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_3:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_3:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_3:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 3
ret i32 %mul
}
@@ -54,11 +161,47 @@ define i32 @test_mul_by_4(i32 %x) {
; X86-NEXT: shll $2, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_4:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_4:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_4:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_4:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $2, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_4:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_4:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_4:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_4:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 4
ret i32 %mul
}
@@ -69,11 +212,46 @@ define i32 @test_mul_by_5(i32 %x) {
; X86-NEXT: imull $5, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_5:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_5:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_5:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_5:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_5:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_5:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_5:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_5:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 5
ret i32 %mul
}
@@ -86,12 +264,46 @@ define i32 @test_mul_by_6(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_6:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: addl %edi, %edi
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_6:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_6:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_6:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_6:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_6:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_6:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_6:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 6
ret i32 %mul
}
@@ -104,12 +316,46 @@ define i32 @test_mul_by_7(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_7:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (,%rdi,8), %eax
-; X64-NEXT: subl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_7:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_7:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_7:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_7:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_7:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_7:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_7:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 7
ret i32 %mul
}
@@ -121,11 +367,47 @@ define i32 @test_mul_by_8(i32 %x) {
; X86-NEXT: shll $3, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_8:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (,%rdi,8), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_8:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_8:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_8:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $3, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_8:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_8:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_8:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_8:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 8
ret i32 %mul
}
@@ -136,11 +418,46 @@ define i32 @test_mul_by_9(i32 %x) {
; X86-NEXT: imull $9, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_9:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_9:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_9:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_9:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_9:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_9:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_9:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_9:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 9
ret i32 %mul
}
@@ -153,12 +470,46 @@ define i32 @test_mul_by_10(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_10:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: addl %edi, %edi
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_10:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_10:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_10:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_10:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_10:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_10:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_10:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 10
ret i32 %mul
}
@@ -166,13 +517,49 @@ define i32 @test_mul_by_10(i32 %x) {
define i32 @test_mul_by_11(i32 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
-; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_11:
-; X64: # BB#0:
-; X64-NEXT: imull $11, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_11:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_11:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_11:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_11:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_11:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_11:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_11:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 11
ret i32 %mul
}
@@ -185,12 +572,46 @@ define i32 @test_mul_by_12(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_12:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: shll $2, %edi
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_12:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_12:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_12:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_12:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_12:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_12:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_12:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 12
ret i32 %mul
}
@@ -198,13 +619,49 @@ define i32 @test_mul_by_12(i32 %x) {
define i32 @test_mul_by_13(i32 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
-; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_13:
-; X64: # BB#0:
-; X64-NEXT: imull $13, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_13:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_13:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_13:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_13:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_13:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_13:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_13:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 13
ret i32 %mul
}
@@ -212,13 +669,52 @@ define i32 @test_mul_by_13(i32 %x) {
define i32 @test_mul_by_14(i32 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
-; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_14:
-; X64: # BB#0:
-; X64-NEXT: imull $14, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_14:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_14:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_14:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_14:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_14:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_14:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_14:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 14
ret i32 %mul
}
@@ -231,12 +727,46 @@ define i32 @test_mul_by_15(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_15:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: leal (%rax,%rax,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_15:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_15:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_15:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_15:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_15:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_15:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_15:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 15
ret i32 %mul
}
@@ -248,11 +778,47 @@ define i32 @test_mul_by_16(i32 %x) {
; X86-NEXT: shll $4, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_16:
-; X64: # BB#0:
-; X64-NEXT: shll $4, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_16:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_16:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_16:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $4, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_16:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_16:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_16:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_16:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 16
ret i32 %mul
}
@@ -266,13 +832,49 @@ define i32 @test_mul_by_17(i32 %x) {
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_17:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shll $4, %eax
-; X64-NEXT: leal (%rax,%rdi), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_17:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_17:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_17:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_17:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_17:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_17:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_17:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 17
ret i32 %mul
}
@@ -285,12 +887,46 @@ define i32 @test_mul_by_18(i32 %x) {
; X86-NEXT: leal (%eax,%eax,8), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_18:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: addl %edi, %edi
-; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_18:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_18:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_18:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_18:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_18:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_18:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_18:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 18
ret i32 %mul
}
@@ -298,13 +934,52 @@ define i32 @test_mul_by_18(i32 %x) {
define i32 @test_mul_by_19(i32 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
-; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_19:
-; X64: # BB#0:
-; X64-NEXT: imull $19, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_19:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_19:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: shll $2, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_19:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_19:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_19:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_19:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_19:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 19
ret i32 %mul
}
@@ -317,12 +992,46 @@ define i32 @test_mul_by_20(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_20:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: shll $2, %edi
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_20:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_20:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_20:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_20:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_20:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_20:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_20:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 20
ret i32 %mul
}
@@ -330,13 +1039,49 @@ define i32 @test_mul_by_20(i32 %x) {
define i32 @test_mul_by_21(i32 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
-; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_21:
-; X64: # BB#0:
-; X64-NEXT: imull $21, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_21:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_21:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_21:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_21:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_21:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_21:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_21:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 21
ret i32 %mul
}
@@ -344,13 +1089,52 @@ define i32 @test_mul_by_21(i32 %x) {
define i32 @test_mul_by_22(i32 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
-; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_22:
-; X64: # BB#0:
-; X64-NEXT: imull $22, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_22:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_22:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_22:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_22:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_22:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_22:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_22:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 22
ret i32 %mul
}
@@ -358,13 +1142,52 @@ define i32 @test_mul_by_22(i32 %x) {
define i32 @test_mul_by_23(i32 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
-; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: shll $3, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_23:
-; X64: # BB#0:
-; X64-NEXT: imull $23, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_23:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_23:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: shll $3, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_23:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_23:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_23:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_23:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_23:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 23
ret i32 %mul
}
@@ -377,12 +1200,46 @@ define i32 @test_mul_by_24(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_24:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: shll $3, %edi
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_24:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_24:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: shll $3, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_24:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_24:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_24:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_24:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: shll $3, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_24:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 24
ret i32 %mul
}
@@ -395,12 +1252,46 @@ define i32 @test_mul_by_25(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_25:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: leal (%rax,%rax,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_25:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_25:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_25:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_25:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_25:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_25:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rax,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_25:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 25
ret i32 %mul
}
@@ -408,13 +1299,52 @@ define i32 @test_mul_by_25(i32 %x) {
define i32 @test_mul_by_26(i32 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
-; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_26:
-; X64: # BB#0:
-; X64-NEXT: imull $26, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_26:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_26:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_26:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_26:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_26:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_26:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_26:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 26
ret i32 %mul
}
@@ -427,12 +1357,46 @@ define i32 @test_mul_by_27(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_27:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: leal (%rax,%rax,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_27:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_27:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_27:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_27:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_27:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_27:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_27:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 27
ret i32 %mul
}
@@ -440,13 +1404,52 @@ define i32 @test_mul_by_27(i32 %x) {
define i32 @test_mul_by_28(i32 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
-; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_28:
-; X64: # BB#0:
-; X64-NEXT: imull $28, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_28:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_28:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_28:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_28:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_28:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_28:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_28:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 28
ret i32 %mul
}
@@ -454,13 +1457,55 @@ define i32 @test_mul_by_28(i32 %x) {
define i32 @test_mul_by_29(i32 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
-; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_29:
-; X64: # BB#0:
-; X64-NEXT: imull $29, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_29:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_29:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_29:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_29:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_29:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_29:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_29:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 29
ret i32 %mul
}
@@ -468,13 +1513,53 @@ define i32 @test_mul_by_29(i32 %x) {
define i32 @test_mul_by_30(i32 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
-; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_30:
-; X64: # BB#0:
-; X64-NEXT: imull $30, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_30:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_30:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_30:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_30:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_30:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_30:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_30:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 30
ret i32 %mul
}
@@ -488,12 +1573,46 @@ define i32 @test_mul_by_31(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_31:
-; X64: # BB#0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shll $5, %eax
-; X64-NEXT: subl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_31:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_31:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_31:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_31:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_31:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_31:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00]
+; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_31:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 31
ret i32 %mul
}
@@ -505,11 +1624,124 @@ define i32 @test_mul_by_32(i32 %x) {
; X86-NEXT: shll $5, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_32:
-; X64: # BB#0:
-; X64-NEXT: shll $5, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_32:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_32:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_32:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $5, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_32:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_32:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_32:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_32:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 32
ret i32 %mul
}
+
+; (x*9+42)*(x*5+2)
+define i32 @test_mul_spec(i32 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal 42(%eax,%eax,8), %ecx
+; X86-NEXT: leal 2(%eax,%eax,4), %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_spec:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_spec:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_spec:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: leal 42(%eax,%eax,8), %ecx
+; X86-NOOPT-NEXT: leal 2(%eax,%eax,4), %eax
+; X86-NOOPT-NEXT: imull %ecx, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_spec:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25]
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_spec:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_spec:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
+; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_spec:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
+; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i32 %x, 9
+ %add = add nsw i32 %mul, 42
+ %mul2 = mul nsw i32 %x, 5
+ %add2 = add nsw i32 %mul2, 2
+ %mul3 = mul nsw i32 %add, %add2
+ ret i32 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll
index 8579179a8231..ea841c761c7b 100644
--- a/test/CodeGen/X86/mul-constant-i64.ll
+++ b/test/CodeGen/X86/mul-constant-i64.ll
@@ -1,18 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
-define i64 @test_mul_by_1(i64 %x) {
+define i64 @test_mul_by_1(i64 %x) nounwind {
; X86-LABEL: test_mul_by_1:
; X86: # BB#0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_1:
-; X64: # BB#0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_1:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_1:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_1:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_1:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_1:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_1:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_1:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 1
ret i64 %mul
}
@@ -26,10 +63,43 @@ define i64 @test_mul_by_2(i64 %x) {
; X86-NEXT: addl %eax, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_2:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_2:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_2:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_2:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $1, %eax, %edx
+; X86-NOOPT-NEXT: addl %eax, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_2:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_2:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_2:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_2:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 2
ret i64 %mul
}
@@ -43,10 +113,43 @@ define i64 @test_mul_by_3(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_3:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_3:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_3:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_3:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $3, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_3:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_3:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_3:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_3:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 3
ret i64 %mul
}
@@ -60,10 +163,43 @@ define i64 @test_mul_by_4(i64 %x) {
; X86-NEXT: shll $2, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_4:
-; X64: # BB#0:
-; X64-NEXT: leaq (,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_4:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_4:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_4:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $2, %eax, %edx
+; X86-NOOPT-NEXT: shll $2, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_4:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_4:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_4:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_4:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 4
ret i64 %mul
}
@@ -77,10 +213,43 @@ define i64 @test_mul_by_5(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_5:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_5:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_5:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_5:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $5, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_5:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_5:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_5:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_5:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 5
ret i64 %mul
}
@@ -95,11 +264,46 @@ define i64 @test_mul_by_6(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_6:
-; X64: # BB#0:
-; X64-NEXT: addq %rdi, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_6:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_6:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_6:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $6, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_6:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_6:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_6:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_6:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 6
ret i64 %mul
}
@@ -115,11 +319,46 @@ define i64 @test_mul_by_7(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_7:
-; X64: # BB#0:
-; X64-NEXT: leaq (,%rdi,8), %rax
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_7:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_7:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_7:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $7, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_7:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_7:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_7:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_7:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 7
ret i64 %mul
}
@@ -133,10 +372,43 @@ define i64 @test_mul_by_8(i64 %x) {
; X86-NEXT: shll $3, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_8:
-; X64: # BB#0:
-; X64-NEXT: leaq (,%rdi,8), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_8:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_8:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_8:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $3, %eax, %edx
+; X86-NOOPT-NEXT: shll $3, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_8:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_8:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_8:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_8:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 8
ret i64 %mul
}
@@ -150,10 +422,43 @@ define i64 @test_mul_by_9(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_9:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_9:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_9:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_9:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $9, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_9:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_9:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_9:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_9:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 9
ret i64 %mul
}
@@ -168,11 +473,46 @@ define i64 @test_mul_by_10(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_10:
-; X64: # BB#0:
-; X64-NEXT: addq %rdi, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_10:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_10:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_10:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $10, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_10:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_10:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_10:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_10:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 10
ret i64 %mul
}
@@ -180,16 +520,53 @@ define i64 @test_mul_by_10(i64 %x) {
define i64 @test_mul_by_11(i64 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %ecx
; X86-NEXT: movl $11, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_11:
-; X64: # BB#0:
-; X64-NEXT: imulq $11, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_11:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_11:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_11:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $11, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_11:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_11:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_11:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_11:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 11
ret i64 %mul
}
@@ -204,11 +581,46 @@ define i64 @test_mul_by_12(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,4), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_12:
-; X64: # BB#0:
-; X64-NEXT: shlq $2, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_12:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_12:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_12:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $12, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_12:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_12:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_12:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_12:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 12
ret i64 %mul
}
@@ -216,16 +628,53 @@ define i64 @test_mul_by_12(i64 %x) {
define i64 @test_mul_by_13(i64 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
; X86-NEXT: movl $13, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_13:
-; X64: # BB#0:
-; X64-NEXT: imulq $13, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_13:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_13:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_13:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $13, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_13:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_13:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_13:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_13:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 13
ret i64 %mul
}
@@ -233,16 +682,56 @@ define i64 @test_mul_by_13(i64 %x) {
define i64 @test_mul_by_14(i64 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $14, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_14:
-; X64: # BB#0:
-; X64-NEXT: imulq $14, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_14:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_14:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_14:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $14, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_14:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_14:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_14:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_14:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 14
ret i64 %mul
}
@@ -258,11 +747,46 @@ define i64 @test_mul_by_15(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_15:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: leaq (%rax,%rax,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_15:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_15:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_15:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $15, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_15:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_15:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_15:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_15:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 15
ret i64 %mul
}
@@ -276,11 +800,49 @@ define i64 @test_mul_by_16(i64 %x) {
; X86-NEXT: shll $4, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_16:
-; X64: # BB#0:
-; X64-NEXT: shlq $4, %rdi
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_16:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_16:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_16:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $4, %eax, %edx
+; X86-NOOPT-NEXT: shll $4, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_16:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_16:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_16:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_16:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 16
ret i64 %mul
}
@@ -297,12 +859,49 @@ define i64 @test_mul_by_17(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_17:
-; X64: # BB#0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shlq $4, %rax
-; X64-NEXT: leaq (%rax,%rdi), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_17:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_17:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_17:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $17, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_17:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_17:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_17:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00]
+; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_17:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 17
ret i64 %mul
}
@@ -317,11 +916,46 @@ define i64 @test_mul_by_18(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_18:
-; X64: # BB#0:
-; X64-NEXT: addq %rdi, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_18:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_18:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_18:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $18, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_18:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_18:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_18:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_18:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 18
ret i64 %mul
}
@@ -329,16 +963,56 @@ define i64 @test_mul_by_18(i64 %x) {
define i64 @test_mul_by_19(i64 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: shll $2, %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $19, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_19:
-; X64: # BB#0:
-; X64-NEXT: imulq $19, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_19:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_19:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: shlq $2, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_19:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $19, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_19:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_19:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_19:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_19:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 19
ret i64 %mul
}
@@ -353,11 +1027,46 @@ define i64 @test_mul_by_20(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,4), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_20:
-; X64: # BB#0:
-; X64-NEXT: shlq $2, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_20:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_20:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_20:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $20, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_20:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_20:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_20:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_20:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 20
ret i64 %mul
}
@@ -365,16 +1074,53 @@ define i64 @test_mul_by_20(i64 %x) {
define i64 @test_mul_by_21(i64 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
; X86-NEXT: movl $21, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_21:
-; X64: # BB#0:
-; X64-NEXT: imulq $21, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_21:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_21:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_21:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $21, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_21:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_21:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_21:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_21:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 21
ret i64 %mul
}
@@ -382,16 +1128,56 @@ define i64 @test_mul_by_21(i64 %x) {
define i64 @test_mul_by_22(i64 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $22, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_22:
-; X64: # BB#0:
-; X64-NEXT: imulq $22, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_22:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_22:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_22:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $22, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_22:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_22:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_22:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_22:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 22
ret i64 %mul
}
@@ -399,16 +1185,56 @@ define i64 @test_mul_by_22(i64 %x) {
define i64 @test_mul_by_23(i64 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: shll $3, %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $23, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_23:
-; X64: # BB#0:
-; X64-NEXT: imulq $23, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_23:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_23:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: shlq $3, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_23:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $23, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_23:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_23:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_23:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_23:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 23
ret i64 %mul
}
@@ -423,11 +1249,46 @@ define i64 @test_mul_by_24(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,8), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_24:
-; X64: # BB#0:
-; X64-NEXT: shlq $3, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_24:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_24:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $3, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_24:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $24, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_24:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_24:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_24:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $3, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_24:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 24
ret i64 %mul
}
@@ -443,11 +1304,46 @@ define i64 @test_mul_by_25(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_25:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: leaq (%rax,%rax,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_25:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_25:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_25:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $25, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_25:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_25:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_25:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_25:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 25
ret i64 %mul
}
@@ -455,16 +1351,56 @@ define i64 @test_mul_by_25(i64 %x) {
define i64 @test_mul_by_26(i64 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $26, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_26:
-; X64: # BB#0:
-; X64-NEXT: imulq $26, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_26:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_26:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_26:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $26, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_26:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_26:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_26:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_26:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 26
ret i64 %mul
}
@@ -480,11 +1416,46 @@ define i64 @test_mul_by_27(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_27:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: leaq (%rax,%rax,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_27:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_27:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_27:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $27, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_27:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_27:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_27:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_27:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 27
ret i64 %mul
}
@@ -492,16 +1463,56 @@ define i64 @test_mul_by_27(i64 %x) {
define i64 @test_mul_by_28(i64 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $28, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_28:
-; X64: # BB#0:
-; X64-NEXT: imulq $28, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_28:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_28:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_28:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $28, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_28:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_28:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_28:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_28:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 28
ret i64 %mul
}
@@ -509,16 +1520,59 @@ define i64 @test_mul_by_28(i64 %x) {
define i64 @test_mul_by_29(i64 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $29, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_29:
-; X64: # BB#0:
-; X64-NEXT: imulq $29, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_29:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_29:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_29:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $29, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_29:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_29:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_29:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_29:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 29
ret i64 %mul
}
@@ -526,16 +1580,59 @@ define i64 @test_mul_by_29(i64 %x) {
define i64 @test_mul_by_30(i64 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $30, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_30:
-; X64: # BB#0:
-; X64-NEXT: imulq $30, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_30:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_30:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_30:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $30, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_30:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_30:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_30:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_30:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 30
ret i64 %mul
}
@@ -552,12 +1649,49 @@ define i64 @test_mul_by_31(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_31:
-; X64: # BB#0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shlq $5, %rax
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_31:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_31:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_31:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $31, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_31:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_31:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_31:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00]
+; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_31:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 31
ret i64 %mul
}
@@ -571,11 +1705,168 @@ define i64 @test_mul_by_32(i64 %x) {
; X86-NEXT: shll $5, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_32:
-; X64: # BB#0:
-; X64-NEXT: shlq $5, %rdi
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_32:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_32:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_32:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $5, %eax, %edx
+; X86-NOOPT-NEXT: shll $5, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_32:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_32:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_32:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_32:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 32
ret i64 %mul
}
+
+; (x*9+42)*(x*5+2)
+define i64 @test_mul_spec(i64 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86: # BB#0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl $9, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: leal (%edi,%edi,8), %ebx
+; X86-NEXT: addl $42, %esi
+; X86-NEXT: adcl %edx, %ebx
+; X86-NEXT: movl $5, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: leal (%edi,%edi,4), %edi
+; X86-NEXT: addl $2, %ecx
+; X86-NEXT: adcl %edx, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: imull %esi, %edi
+; X86-NEXT: addl %edi, %edx
+; X86-NEXT: imull %ebx, %ecx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_spec:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; X64-HSW-NEXT: addq $42, %rcx # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_spec:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_spec:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: pushl %ebx
+; X86-NOOPT-NEXT: pushl %edi
+; X86-NOOPT-NEXT: pushl %esi
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NOOPT-NEXT: movl $9, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: movl %eax, %esi
+; X86-NOOPT-NEXT: leal (%edi,%edi,8), %ebx
+; X86-NOOPT-NEXT: addl $42, %esi
+; X86-NOOPT-NEXT: adcl %edx, %ebx
+; X86-NOOPT-NEXT: movl $5, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: movl %eax, %ecx
+; X86-NOOPT-NEXT: leal (%edi,%edi,4), %edi
+; X86-NOOPT-NEXT: addl $2, %ecx
+; X86-NOOPT-NEXT: adcl %edx, %edi
+; X86-NOOPT-NEXT: movl %esi, %eax
+; X86-NOOPT-NEXT: mull %ecx
+; X86-NOOPT-NEXT: imull %esi, %edi
+; X86-NOOPT-NEXT: addl %edi, %edx
+; X86-NOOPT-NEXT: imull %ebx, %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: popl %esi
+; X86-NOOPT-NEXT: popl %edi
+; X86-NOOPT-NEXT: popl %ebx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_spec:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addq $42, %rcx # sched: [1:0.25]
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_spec:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_spec:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
+; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_spec:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
+; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i64 %x, 9
+ %add = add nsw i64 %mul, 42
+ %mul2 = mul nsw i64 %x, 5
+ %add2 = add nsw i64 %mul2, 2
+ %mul3 = mul nsw i64 %add, %add2
+ ret i64 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-result.ll b/test/CodeGen/X86/mul-constant-result.ll
new file mode 100644
index 000000000000..65d80a699e24
--- /dev/null
+++ b/test/CodeGen/X86/mul-constant-result.ll
@@ -0,0 +1,1291 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @mult(i32, i32) local_unnamed_addr #0 {
+; X86-LABEL: mult:
+; X86: # BB#0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi0:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .Lcfi1:
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: movl $1, %esi
+; X86-NEXT: jg .LBB0_2
+; X86-NEXT: # BB#1:
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: .LBB0_2:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: je .LBB0_4
+; X86-NEXT: # BB#3:
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: .LBB0_4:
+; X86-NEXT: decl %ecx
+; X86-NEXT: cmpl $31, %ecx
+; X86-NEXT: ja .LBB0_39
+; X86-NEXT: # BB#5:
+; X86-NEXT: jmpl *.LJTI0_0(,%ecx,4)
+; X86-NEXT: .LBB0_6:
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_39:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB0_40:
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_7:
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_8:
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_9:
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_10:
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_11:
+; X86-NEXT: leal (,%eax,8), %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_13:
+; X86-NEXT: shll $3, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_14:
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_15:
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_16:
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_17:
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_18:
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_19:
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: jmp .LBB0_20
+; X86-NEXT: .LBB0_21:
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_22:
+; X86-NEXT: shll $4, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_23:
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $4, %ecx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_24:
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_25:
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: shll $2, %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_26:
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_27:
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_28:
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: .LBB0_20:
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_29:
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: shll $3, %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_30:
+; X86-NEXT: shll $3, %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_31:
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_32:
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_33:
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_34:
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_35:
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_36:
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_37:
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: .LBB0_12:
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_38:
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: mult:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: cmpl $1, %esi
+; X64-HSW-NEXT: movl $1, %ecx
+; X64-HSW-NEXT: movl %esi, %eax
+; X64-HSW-NEXT: cmovgl %ecx, %eax
+; X64-HSW-NEXT: testl %esi, %esi
+; X64-HSW-NEXT: cmovel %ecx, %eax
+; X64-HSW-NEXT: addl $-1, %edi
+; X64-HSW-NEXT: cmpl $31, %edi
+; X64-HSW-NEXT: ja .LBB0_36
+; X64-HSW-NEXT: # BB#1:
+; X64-HSW-NEXT: jmpq *.LJTI0_0(,%rdi,8)
+; X64-HSW-NEXT: .LBB0_2:
+; X64-HSW-NEXT: addl %eax, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_36:
+; X64-HSW-NEXT: xorl %eax, %eax
+; X64-HSW-NEXT: .LBB0_37:
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_3:
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_4:
+; X64-HSW-NEXT: shll $2, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_5:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_6:
+; X64-HSW-NEXT: addl %eax, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_7:
+; X64-HSW-NEXT: leal (,%rax,8), %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_9:
+; X64-HSW-NEXT: shll $3, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_10:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_11:
+; X64-HSW-NEXT: addl %eax, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_12:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT: leal (%rax,%rcx,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_13:
+; X64-HSW-NEXT: shll $2, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_14:
+; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_15:
+; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT: jmp .LBB0_16
+; X64-HSW-NEXT: .LBB0_18:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_19:
+; X64-HSW-NEXT: shll $4, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_20:
+; X64-HSW-NEXT: movl %eax, %ecx
+; X64-HSW-NEXT: shll $4, %ecx
+; X64-HSW-NEXT: jmp .LBB0_17
+; X64-HSW-NEXT: .LBB0_21:
+; X64-HSW-NEXT: addl %eax, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_22:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT: shll $2, %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_23:
+; X64-HSW-NEXT: shll $2, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_24:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_25:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT: .LBB0_16:
+; X64-HSW-NEXT: leal (%rax,%rcx,4), %ecx
+; X64-HSW-NEXT: jmp .LBB0_17
+; X64-HSW-NEXT: .LBB0_26:
+; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT: shll $3, %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_27:
+; X64-HSW-NEXT: shll $3, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_28:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_29:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_30:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_31:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT: jmp .LBB0_17
+; X64-HSW-NEXT: .LBB0_32:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT: addl %eax, %ecx
+; X64-HSW-NEXT: .LBB0_17:
+; X64-HSW-NEXT: addl %eax, %ecx
+; X64-HSW-NEXT: movl %ecx, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_33:
+; X64-HSW-NEXT: movl %eax, %ecx
+; X64-HSW-NEXT: shll $5, %ecx
+; X64-HSW-NEXT: subl %eax, %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_34:
+; X64-HSW-NEXT: movl %eax, %ecx
+; X64-HSW-NEXT: shll $5, %ecx
+; X64-HSW-NEXT: .LBB0_8:
+; X64-HSW-NEXT: subl %eax, %ecx
+; X64-HSW-NEXT: movl %ecx, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_35:
+; X64-HSW-NEXT: shll $5, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+ %3 = icmp eq i32 %1, 0
+ %4 = icmp sgt i32 %1, 1
+ %5 = or i1 %3, %4
+ %6 = select i1 %5, i32 1, i32 %1
+ switch i32 %0, label %69 [
+ i32 1, label %70
+ i32 2, label %7
+ i32 3, label %9
+ i32 4, label %11
+ i32 5, label %13
+ i32 6, label %15
+ i32 7, label %17
+ i32 8, label %19
+ i32 9, label %21
+ i32 10, label %23
+ i32 11, label %25
+ i32 12, label %27
+ i32 13, label %29
+ i32 14, label %31
+ i32 15, label %33
+ i32 16, label %35
+ i32 17, label %37
+ i32 18, label %39
+ i32 19, label %41
+ i32 20, label %43
+ i32 21, label %45
+ i32 22, label %47
+ i32 23, label %49
+ i32 24, label %51
+ i32 25, label %53
+ i32 26, label %55
+ i32 27, label %57
+ i32 28, label %59
+ i32 29, label %61
+ i32 30, label %63
+ i32 31, label %65
+ i32 32, label %67
+ ]
+
+; <label>:7: ; preds = %2
+ %8 = shl nsw i32 %6, 1
+ br label %70
+
+; <label>:9: ; preds = %2
+ %10 = mul nsw i32 %6, 3
+ br label %70
+
+; <label>:11: ; preds = %2
+ %12 = shl nsw i32 %6, 2
+ br label %70
+
+; <label>:13: ; preds = %2
+ %14 = mul nsw i32 %6, 5
+ br label %70
+
+; <label>:15: ; preds = %2
+ %16 = mul nsw i32 %6, 6
+ br label %70
+
+; <label>:17: ; preds = %2
+ %18 = mul nsw i32 %6, 7
+ br label %70
+
+; <label>:19: ; preds = %2
+ %20 = shl nsw i32 %6, 3
+ br label %70
+
+; <label>:21: ; preds = %2
+ %22 = mul nsw i32 %6, 9
+ br label %70
+
+; <label>:23: ; preds = %2
+ %24 = mul nsw i32 %6, 10
+ br label %70
+
+; <label>:25: ; preds = %2
+ %26 = mul nsw i32 %6, 11
+ br label %70
+
+; <label>:27: ; preds = %2
+ %28 = mul nsw i32 %6, 12
+ br label %70
+
+; <label>:29: ; preds = %2
+ %30 = mul nsw i32 %6, 13
+ br label %70
+
+; <label>:31: ; preds = %2
+ %32 = mul nsw i32 %6, 14
+ br label %70
+
+; <label>:33: ; preds = %2
+ %34 = mul nsw i32 %6, 15
+ br label %70
+
+; <label>:35: ; preds = %2
+ %36 = shl nsw i32 %6, 4
+ br label %70
+
+; <label>:37: ; preds = %2
+ %38 = mul nsw i32 %6, 17
+ br label %70
+
+; <label>:39: ; preds = %2
+ %40 = mul nsw i32 %6, 18
+ br label %70
+
+; <label>:41: ; preds = %2
+ %42 = mul nsw i32 %6, 19
+ br label %70
+
+; <label>:43: ; preds = %2
+ %44 = mul nsw i32 %6, 20
+ br label %70
+
+; <label>:45: ; preds = %2
+ %46 = mul nsw i32 %6, 21
+ br label %70
+
+; <label>:47: ; preds = %2
+ %48 = mul nsw i32 %6, 22
+ br label %70
+
+; <label>:49: ; preds = %2
+ %50 = mul nsw i32 %6, 23
+ br label %70
+
+; <label>:51: ; preds = %2
+ %52 = mul nsw i32 %6, 24
+ br label %70
+
+; <label>:53: ; preds = %2
+ %54 = mul nsw i32 %6, 25
+ br label %70
+
+; <label>:55: ; preds = %2
+ %56 = mul nsw i32 %6, 26
+ br label %70
+
+; <label>:57: ; preds = %2
+ %58 = mul nsw i32 %6, 27
+ br label %70
+
+; <label>:59: ; preds = %2
+ %60 = mul nsw i32 %6, 28
+ br label %70
+
+; <label>:61: ; preds = %2
+ %62 = mul nsw i32 %6, 29
+ br label %70
+
+; <label>:63: ; preds = %2
+ %64 = mul nsw i32 %6, 30
+ br label %70
+
+; <label>:65: ; preds = %2
+ %66 = mul nsw i32 %6, 31
+ br label %70
+
+; <label>:67: ; preds = %2
+ %68 = shl nsw i32 %6, 5
+ br label %70
+
+; <label>:69: ; preds = %2
+ br label %70
+
+; <label>:70: ; preds = %2, %69, %67, %65, %63, %61, %59, %57, %55, %53, %51, %49, %47, %45, %43, %41, %39, %37, %35, %33, %31, %29, %27, %25, %23, %21, %19, %17, %15, %13, %11, %9, %7
+ %71 = phi i32 [ %8, %7 ], [ %10, %9 ], [ %12, %11 ], [ %14, %13 ], [ %16, %15 ], [ %18, %17 ], [ %20, %19 ], [ %22, %21 ], [ %24, %23 ], [ %26, %25 ], [ %28, %27 ], [ %30, %29 ], [ %32, %31 ], [ %34, %33 ], [ %36, %35 ], [ %38, %37 ], [ %40, %39 ], [ %42, %41 ], [ %44, %43 ], [ %46, %45 ], [ %48, %47 ], [ %50, %49 ], [ %52, %51 ], [ %54, %53 ], [ %56, %55 ], [ %58, %57 ], [ %60, %59 ], [ %62, %61 ], [ %64, %63 ], [ %66, %65 ], [ %68, %67 ], [ 0, %69 ], [ %6, %2 ]
+ ret i32 %71
+}
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @foo() local_unnamed_addr #0 {
+; X86-LABEL: foo:
+; X86: # BB#0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: .Lcfi2:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: pushl %edi
+; X86-NEXT: .Lcfi3:
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi4:
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: .Lcfi5:
+; X86-NEXT: .cfi_offset %esi, -16
+; X86-NEXT: .Lcfi6:
+; X86-NEXT: .cfi_offset %edi, -12
+; X86-NEXT: .Lcfi7:
+; X86-NEXT: .cfi_offset %ebx, -8
+; X86-NEXT: pushl $0
+; X86-NEXT: .Lcfi8:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $1
+; X86-NEXT: .Lcfi9:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi10:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: xorl $1, %esi
+; X86-NEXT: pushl $1
+; X86-NEXT: .Lcfi11:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $2
+; X86-NEXT: .Lcfi12:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi13:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $2, %edi
+; X86-NEXT: pushl $1
+; X86-NEXT: .Lcfi14:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $3
+; X86-NEXT: .Lcfi15:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi16:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $3, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $2
+; X86-NEXT: .Lcfi17:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $4
+; X86-NEXT: .Lcfi18:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi19:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $4, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $2
+; X86-NEXT: .Lcfi20:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $5
+; X86-NEXT: .Lcfi21:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi22:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $5, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $3
+; X86-NEXT: .Lcfi23:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $6
+; X86-NEXT: .Lcfi24:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi25:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $6, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $3
+; X86-NEXT: .Lcfi26:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $7
+; X86-NEXT: .Lcfi27:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi28:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $7, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $4
+; X86-NEXT: .Lcfi29:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $8
+; X86-NEXT: .Lcfi30:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi31:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $8, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $4
+; X86-NEXT: .Lcfi32:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $9
+; X86-NEXT: .Lcfi33:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi34:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $9, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $5
+; X86-NEXT: .Lcfi35:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $10
+; X86-NEXT: .Lcfi36:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi37:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $10, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $5
+; X86-NEXT: .Lcfi38:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $11
+; X86-NEXT: .Lcfi39:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi40:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $11, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $6
+; X86-NEXT: .Lcfi41:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $12
+; X86-NEXT: .Lcfi42:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi43:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $12, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $6
+; X86-NEXT: .Lcfi44:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $13
+; X86-NEXT: .Lcfi45:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi46:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $13, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $7
+; X86-NEXT: .Lcfi47:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $14
+; X86-NEXT: .Lcfi48:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi49:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $14, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $7
+; X86-NEXT: .Lcfi50:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $15
+; X86-NEXT: .Lcfi51:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi52:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $15, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $8
+; X86-NEXT: .Lcfi53:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $16
+; X86-NEXT: .Lcfi54:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi55:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $16, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $8
+; X86-NEXT: .Lcfi56:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $17
+; X86-NEXT: .Lcfi57:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi58:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $17, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $9
+; X86-NEXT: .Lcfi59:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $18
+; X86-NEXT: .Lcfi60:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi61:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $18, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $9
+; X86-NEXT: .Lcfi62:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $19
+; X86-NEXT: .Lcfi63:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi64:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $19, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $10
+; X86-NEXT: .Lcfi65:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $20
+; X86-NEXT: .Lcfi66:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi67:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $20, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $10
+; X86-NEXT: .Lcfi68:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $21
+; X86-NEXT: .Lcfi69:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi70:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $21, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $11
+; X86-NEXT: .Lcfi71:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $22
+; X86-NEXT: .Lcfi72:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi73:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $22, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $11
+; X86-NEXT: .Lcfi74:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $23
+; X86-NEXT: .Lcfi75:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi76:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $23, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $12
+; X86-NEXT: .Lcfi77:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $24
+; X86-NEXT: .Lcfi78:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi79:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $24, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $12
+; X86-NEXT: .Lcfi80:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $25
+; X86-NEXT: .Lcfi81:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi82:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $25, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $13
+; X86-NEXT: .Lcfi83:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $26
+; X86-NEXT: .Lcfi84:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi85:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $26, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $13
+; X86-NEXT: .Lcfi86:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $27
+; X86-NEXT: .Lcfi87:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi88:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $27, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $14
+; X86-NEXT: .Lcfi89:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $28
+; X86-NEXT: .Lcfi90:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi91:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $28, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $14
+; X86-NEXT: .Lcfi92:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $29
+; X86-NEXT: .Lcfi93:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi94:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $29, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $15
+; X86-NEXT: .Lcfi95:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $30
+; X86-NEXT: .Lcfi96:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi97:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $30, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $15
+; X86-NEXT: .Lcfi98:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $31
+; X86-NEXT: .Lcfi99:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi100:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $31, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: orl %esi, %ebx
+; X86-NEXT: pushl $16
+; X86-NEXT: .Lcfi101:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $32
+; X86-NEXT: .Lcfi102:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi103:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: xorl $32, %eax
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: jne .LBB1_2
+; X86-NEXT: # BB#1:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB1_2:
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: foo:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: pushq %rbp
+; X64-HSW-NEXT: .Lcfi0:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 16
+; X64-HSW-NEXT: pushq %r15
+; X64-HSW-NEXT: .Lcfi1:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 24
+; X64-HSW-NEXT: pushq %r14
+; X64-HSW-NEXT: .Lcfi2:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 32
+; X64-HSW-NEXT: pushq %r12
+; X64-HSW-NEXT: .Lcfi3:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 40
+; X64-HSW-NEXT: pushq %rbx
+; X64-HSW-NEXT: .Lcfi4:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 48
+; X64-HSW-NEXT: .Lcfi5:
+; X64-HSW-NEXT: .cfi_offset %rbx, -48
+; X64-HSW-NEXT: .Lcfi6:
+; X64-HSW-NEXT: .cfi_offset %r12, -40
+; X64-HSW-NEXT: .Lcfi7:
+; X64-HSW-NEXT: .cfi_offset %r14, -32
+; X64-HSW-NEXT: .Lcfi8:
+; X64-HSW-NEXT: .cfi_offset %r15, -24
+; X64-HSW-NEXT: .Lcfi9:
+; X64-HSW-NEXT: .cfi_offset %rbp, -16
+; X64-HSW-NEXT: xorl %r12d, %r12d
+; X64-HSW-NEXT: movl $1, %edi
+; X64-HSW-NEXT: xorl %esi, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $1, %ebx
+; X64-HSW-NEXT: movl $2, %edi
+; X64-HSW-NEXT: movl $1, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $2, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $3, %edi
+; X64-HSW-NEXT: movl $1, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $3, %r14d
+; X64-HSW-NEXT: movl $4, %edi
+; X64-HSW-NEXT: movl $2, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $4, %ebx
+; X64-HSW-NEXT: orl %r14d, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $5, %edi
+; X64-HSW-NEXT: movl $2, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $5, %r14d
+; X64-HSW-NEXT: movl $6, %edi
+; X64-HSW-NEXT: movl $3, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $6, %ebp
+; X64-HSW-NEXT: orl %r14d, %ebp
+; X64-HSW-NEXT: movl $7, %edi
+; X64-HSW-NEXT: movl $3, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $7, %r14d
+; X64-HSW-NEXT: orl %ebp, %r14d
+; X64-HSW-NEXT: orl %ebx, %r14d
+; X64-HSW-NEXT: movl $8, %edi
+; X64-HSW-NEXT: movl $4, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $8, %ebx
+; X64-HSW-NEXT: movl $9, %edi
+; X64-HSW-NEXT: movl $4, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $9, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $10, %edi
+; X64-HSW-NEXT: movl $5, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $10, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $11, %edi
+; X64-HSW-NEXT: movl $5, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r15d
+; X64-HSW-NEXT: xorl $11, %r15d
+; X64-HSW-NEXT: orl %ebx, %r15d
+; X64-HSW-NEXT: orl %r14d, %r15d
+; X64-HSW-NEXT: movl $12, %edi
+; X64-HSW-NEXT: movl $6, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $12, %ebx
+; X64-HSW-NEXT: movl $13, %edi
+; X64-HSW-NEXT: movl $6, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $13, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $14, %edi
+; X64-HSW-NEXT: movl $7, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $14, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $15, %edi
+; X64-HSW-NEXT: movl $7, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $15, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $16, %edi
+; X64-HSW-NEXT: movl $8, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $16, %r14d
+; X64-HSW-NEXT: orl %ebp, %r14d
+; X64-HSW-NEXT: orl %r15d, %r14d
+; X64-HSW-NEXT: movl $17, %edi
+; X64-HSW-NEXT: movl $8, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $17, %ebp
+; X64-HSW-NEXT: movl $18, %edi
+; X64-HSW-NEXT: movl $9, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $18, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $19, %edi
+; X64-HSW-NEXT: movl $9, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $19, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $20, %edi
+; X64-HSW-NEXT: movl $10, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $20, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $21, %edi
+; X64-HSW-NEXT: movl $10, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $21, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $22, %edi
+; X64-HSW-NEXT: movl $11, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r15d
+; X64-HSW-NEXT: xorl $22, %r15d
+; X64-HSW-NEXT: orl %ebp, %r15d
+; X64-HSW-NEXT: orl %r14d, %r15d
+; X64-HSW-NEXT: movl $23, %edi
+; X64-HSW-NEXT: movl $11, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $23, %ebp
+; X64-HSW-NEXT: movl $24, %edi
+; X64-HSW-NEXT: movl $12, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $24, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $25, %edi
+; X64-HSW-NEXT: movl $12, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $25, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $26, %edi
+; X64-HSW-NEXT: movl $13, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $26, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $27, %edi
+; X64-HSW-NEXT: movl $13, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $27, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $28, %edi
+; X64-HSW-NEXT: movl $14, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $28, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $29, %edi
+; X64-HSW-NEXT: movl $14, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $29, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: orl %r15d, %ebp
+; X64-HSW-NEXT: movl $30, %edi
+; X64-HSW-NEXT: movl $15, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $30, %r14d
+; X64-HSW-NEXT: movl $31, %edi
+; X64-HSW-NEXT: movl $15, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $31, %ebx
+; X64-HSW-NEXT: orl %r14d, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $32, %edi
+; X64-HSW-NEXT: movl $16, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: xorl $32, %eax
+; X64-HSW-NEXT: orl %ebx, %eax
+; X64-HSW-NEXT: movl $-1, %eax
+; X64-HSW-NEXT: cmovel %r12d, %eax
+; X64-HSW-NEXT: popq %rbx
+; X64-HSW-NEXT: popq %r12
+; X64-HSW-NEXT: popq %r14
+; X64-HSW-NEXT: popq %r15
+; X64-HSW-NEXT: popq %rbp
+; X64-HSW-NEXT: retq
+ %1 = tail call i32 @mult(i32 1, i32 0)
+ %2 = icmp ne i32 %1, 1
+ %3 = tail call i32 @mult(i32 2, i32 1)
+ %4 = icmp ne i32 %3, 2
+ %5 = or i1 %2, %4
+ %6 = tail call i32 @mult(i32 3, i32 1)
+ %7 = icmp ne i32 %6, 3
+ %8 = or i1 %5, %7
+ %9 = tail call i32 @mult(i32 4, i32 2)
+ %10 = icmp ne i32 %9, 4
+ %11 = or i1 %8, %10
+ %12 = tail call i32 @mult(i32 5, i32 2)
+ %13 = icmp ne i32 %12, 5
+ %14 = or i1 %11, %13
+ %15 = tail call i32 @mult(i32 6, i32 3)
+ %16 = icmp ne i32 %15, 6
+ %17 = or i1 %14, %16
+ %18 = tail call i32 @mult(i32 7, i32 3)
+ %19 = icmp ne i32 %18, 7
+ %20 = or i1 %17, %19
+ %21 = tail call i32 @mult(i32 8, i32 4)
+ %22 = icmp ne i32 %21, 8
+ %23 = or i1 %20, %22
+ %24 = tail call i32 @mult(i32 9, i32 4)
+ %25 = icmp ne i32 %24, 9
+ %26 = or i1 %23, %25
+ %27 = tail call i32 @mult(i32 10, i32 5)
+ %28 = icmp ne i32 %27, 10
+ %29 = or i1 %26, %28
+ %30 = tail call i32 @mult(i32 11, i32 5)
+ %31 = icmp ne i32 %30, 11
+ %32 = or i1 %29, %31
+ %33 = tail call i32 @mult(i32 12, i32 6)
+ %34 = icmp ne i32 %33, 12
+ %35 = or i1 %32, %34
+ %36 = tail call i32 @mult(i32 13, i32 6)
+ %37 = icmp ne i32 %36, 13
+ %38 = or i1 %35, %37
+ %39 = tail call i32 @mult(i32 14, i32 7)
+ %40 = icmp ne i32 %39, 14
+ %41 = or i1 %38, %40
+ %42 = tail call i32 @mult(i32 15, i32 7)
+ %43 = icmp ne i32 %42, 15
+ %44 = or i1 %41, %43
+ %45 = tail call i32 @mult(i32 16, i32 8)
+ %46 = icmp ne i32 %45, 16
+ %47 = or i1 %44, %46
+ %48 = tail call i32 @mult(i32 17, i32 8)
+ %49 = icmp ne i32 %48, 17
+ %50 = or i1 %47, %49
+ %51 = tail call i32 @mult(i32 18, i32 9)
+ %52 = icmp ne i32 %51, 18
+ %53 = or i1 %50, %52
+ %54 = tail call i32 @mult(i32 19, i32 9)
+ %55 = icmp ne i32 %54, 19
+ %56 = or i1 %53, %55
+ %57 = tail call i32 @mult(i32 20, i32 10)
+ %58 = icmp ne i32 %57, 20
+ %59 = or i1 %56, %58
+ %60 = tail call i32 @mult(i32 21, i32 10)
+ %61 = icmp ne i32 %60, 21
+ %62 = or i1 %59, %61
+ %63 = tail call i32 @mult(i32 22, i32 11)
+ %64 = icmp ne i32 %63, 22
+ %65 = or i1 %62, %64
+ %66 = tail call i32 @mult(i32 23, i32 11)
+ %67 = icmp ne i32 %66, 23
+ %68 = or i1 %65, %67
+ %69 = tail call i32 @mult(i32 24, i32 12)
+ %70 = icmp ne i32 %69, 24
+ %71 = or i1 %68, %70
+ %72 = tail call i32 @mult(i32 25, i32 12)
+ %73 = icmp ne i32 %72, 25
+ %74 = or i1 %71, %73
+ %75 = tail call i32 @mult(i32 26, i32 13)
+ %76 = icmp ne i32 %75, 26
+ %77 = or i1 %74, %76
+ %78 = tail call i32 @mult(i32 27, i32 13)
+ %79 = icmp ne i32 %78, 27
+ %80 = or i1 %77, %79
+ %81 = tail call i32 @mult(i32 28, i32 14)
+ %82 = icmp ne i32 %81, 28
+ %83 = or i1 %80, %82
+ %84 = tail call i32 @mult(i32 29, i32 14)
+ %85 = icmp ne i32 %84, 29
+ %86 = or i1 %83, %85
+ %87 = tail call i32 @mult(i32 30, i32 15)
+ %88 = icmp ne i32 %87, 30
+ %89 = or i1 %86, %88
+ %90 = tail call i32 @mult(i32 31, i32 15)
+ %91 = icmp ne i32 %90, 31
+ %92 = or i1 %89, %91
+ %93 = tail call i32 @mult(i32 32, i32 16)
+ %94 = icmp ne i32 %93, 32
+ %95 = or i1 %92, %94
+ %96 = sext i1 %95 to i32
+ ret i32 %96
+}
+
+attributes #0 = { norecurse nounwind readnone uwtable }
diff --git a/test/CodeGen/X86/nontemporal-loads.ll b/test/CodeGen/X86/nontemporal-loads.ll
index eaab26ef9547..3c916fd38c6c 100644
--- a/test/CodeGen/X86/nontemporal-loads.ll
+++ b/test/CodeGen/X86/nontemporal-loads.ll
@@ -168,7 +168,9 @@ define <8 x float> @test_v8f32(<8 x float>* %src) {
;
; AVX1-LABEL: test_v8f32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8f32:
@@ -199,7 +201,9 @@ define <8 x i32> @test_v8i32(<8 x i32>* %src) {
;
; AVX1-LABEL: test_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8i32:
@@ -240,7 +244,9 @@ define <4 x double> @test_v4f64(<4 x double>* %src) {
;
; AVX1-LABEL: test_v4f64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v4f64:
@@ -271,7 +277,9 @@ define <4 x i64> @test_v4i64(<4 x i64>* %src) {
;
; AVX1-LABEL: test_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v4i64:
@@ -302,7 +310,9 @@ define <16 x i16> @test_v16i16(<16 x i16>* %src) {
;
; AVX1-LABEL: test_v16i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v16i16:
@@ -333,7 +343,9 @@ define <32 x i8> @test_v32i8(<32 x i8>* %src) {
;
; AVX1-LABEL: test_v32i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v32i8:
@@ -370,8 +382,12 @@ define <16 x float> @test_v16f32(<16 x float>* %src) {
;
; AVX1-LABEL: test_v16f32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v16f32:
@@ -407,8 +423,12 @@ define <16 x i32> @test_v16i32(<16 x i32>* %src) {
;
; AVX1-LABEL: test_v16i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v16i32:
@@ -444,8 +464,12 @@ define <8 x double> @test_v8f64(<8 x double>* %src) {
;
; AVX1-LABEL: test_v8f64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8f64:
@@ -481,8 +505,12 @@ define <8 x i64> @test_v8i64(<8 x i64>* %src) {
;
; AVX1-LABEL: test_v8i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8i64:
@@ -518,8 +546,12 @@ define <32 x i16> @test_v32i16(<32 x i16>* %src) {
;
; AVX1-LABEL: test_v32i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v32i16:
@@ -567,8 +599,12 @@ define <64 x i8> @test_v64i8(<64 x i8>* %src) {
;
; AVX1-LABEL: test_v64i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v64i8:
@@ -601,19 +637,27 @@ define <64 x i8> @test_v64i8(<64 x i8>* %src) {
; Check cases where the load would be folded.
define <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) {
-; SSE-LABEL: test_arg_v4f32:
-; SSE: # BB#0:
-; SSE-NEXT: addps (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v4f32:
+; SSE2: # BB#0:
+; SSE2-NEXT: addps (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v4f32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: addps %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v4f32:
; AVX: # BB#0:
-; AVX-NEXT: vaddps (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v4f32:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddps (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %src, align 16, !nontemporal !1
%2 = fadd <4 x float> %arg, %1
@@ -621,19 +665,27 @@ define <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) {
}
define <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) {
-; SSE-LABEL: test_arg_v4i32:
-; SSE: # BB#0:
-; SSE-NEXT: paddd (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddd (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: paddd %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v4i32:
; AVX: # BB#0:
-; AVX-NEXT: vpaddd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v4i32:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddd (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1
%2 = add <4 x i32> %arg, %1
@@ -641,19 +693,27 @@ define <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) {
}
define <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) {
-; SSE-LABEL: test_arg_v2f64:
-; SSE: # BB#0:
-; SSE-NEXT: addpd (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v2f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: addpd (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v2f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: addpd %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v2f64:
; AVX: # BB#0:
-; AVX-NEXT: vaddpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v2f64:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddpd (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %src, align 16, !nontemporal !1
%2 = fadd <2 x double> %arg, %1
@@ -661,19 +721,27 @@ define <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) {
}
define <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) {
-; SSE-LABEL: test_arg_v2i64:
-; SSE: # BB#0:
-; SSE-NEXT: paddq (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddq (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: paddq %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v2i64:
; AVX: # BB#0:
-; AVX-NEXT: vpaddq (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v2i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddq (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x i64>, <2 x i64>* %src, align 16, !nontemporal !1
%2 = add <2 x i64> %arg, %1
@@ -681,19 +749,27 @@ define <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) {
}
define <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) {
-; SSE-LABEL: test_arg_v8i16:
-; SSE: # BB#0:
-; SSE-NEXT: paddw (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddw (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: paddw %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v8i16:
; AVX: # BB#0:
-; AVX-NEXT: vpaddw (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v8i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddw (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %src, align 16, !nontemporal !1
%2 = add <8 x i16> %arg, %1
@@ -701,19 +777,27 @@ define <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) {
}
define <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) {
-; SSE-LABEL: test_arg_v16i8:
-; SSE: # BB#0:
-; SSE-NEXT: paddb (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddb (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: paddb %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v16i8:
; AVX: # BB#0:
-; AVX-NEXT: vpaddb (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddb (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %src, align 16, !nontemporal !1
%2 = add <16 x i8> %arg, %1
@@ -723,20 +807,38 @@ define <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) {
; And now YMM versions.
define <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) {
-; SSE-LABEL: test_arg_v8f32:
-; SSE: # BB#0:
-; SSE-NEXT: addps (%rdi), %xmm0
-; SSE-NEXT: addps 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8f32:
+; SSE2: # BB#0:
+; SSE2-NEXT: addps (%rdi), %xmm0
+; SSE2-NEXT: addps 16(%rdi), %xmm1
+; SSE2-NEXT: retq
;
-; AVX-LABEL: test_arg_v8f32:
-; AVX: # BB#0:
-; AVX-NEXT: vaddps (%rdi), %ymm0, %ymm0
-; AVX-NEXT: retq
+; SSE41-LABEL: test_arg_v8f32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: addps %xmm3, %xmm0
+; SSE41-NEXT: addps %xmm2, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: test_arg_v8f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_arg_v8f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v8f32:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddps (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %src, align 32, !nontemporal !1
%2 = fadd <8 x float> %arg, %1
@@ -744,51 +846,90 @@ define <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) {
}
define <8 x i32> @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %src) {
-; SSE-LABEL: test_arg_v8i32:
-; SSE: # BB#0:
-; SSE-NEXT: paddd (%rdi), %xmm0
-; SSE-NEXT: paddd 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddd (%rdi), %xmm0
+; SSE2-NEXT: paddd 16(%rdi), %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: paddd %xmm3, %xmm0
+; SSE41-NEXT: paddd %xmm2, %xmm1
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v8i32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddd (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_arg_v8i32:
-; AVX512: # BB#0:
-; AVX512-NEXT: vpaddd (%rdi), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: test_arg_v8i32:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: test_arg_v8i32:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_arg_v8i32:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpaddd (%rdi), %ymm0, %ymm0
+; AVX512VL-NEXT: retq
%1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
%2 = add <8 x i32> %arg, %1
ret <8 x i32> %2
}
define <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) {
-; SSE-LABEL: test_arg_v4f64:
-; SSE: # BB#0:
-; SSE-NEXT: addpd (%rdi), %xmm0
-; SSE-NEXT: addpd 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v4f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: addpd (%rdi), %xmm0
+; SSE2-NEXT: addpd 16(%rdi), %xmm1
+; SSE2-NEXT: retq
;
-; AVX-LABEL: test_arg_v4f64:
-; AVX: # BB#0:
-; AVX-NEXT: vaddpd (%rdi), %ymm0, %ymm0
-; AVX-NEXT: retq
+; SSE41-LABEL: test_arg_v4f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: addpd %xmm3, %xmm0
+; SSE41-NEXT: addpd %xmm2, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: test_arg_v4f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_arg_v4f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v4f64:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddpd (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %src, align 32, !nontemporal !1
%2 = fadd <4 x double> %arg, %1
@@ -796,30 +937,40 @@ define <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) {
}
define <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) {
-; SSE-LABEL: test_arg_v4i64:
-; SSE: # BB#0:
-; SSE-NEXT: paddq (%rdi), %xmm0
-; SSE-NEXT: paddq 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddq (%rdi), %xmm0
+; SSE2-NEXT: paddq 16(%rdi), %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: paddq %xmm3, %xmm0
+; SSE41-NEXT: paddq %xmm2, %xmm1
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddq (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v4i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddq (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x i64>, <4 x i64>* %src, align 32, !nontemporal !1
%2 = add <4 x i64> %arg, %1
@@ -827,30 +978,40 @@ define <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) {
}
define <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) {
-; SSE-LABEL: test_arg_v16i16:
-; SSE: # BB#0:
-; SSE-NEXT: paddw (%rdi), %xmm0
-; SSE-NEXT: paddw 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v16i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddw (%rdi), %xmm0
+; SSE2-NEXT: paddw 16(%rdi), %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v16i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: paddw %xmm3, %xmm0
+; SSE41-NEXT: paddw %xmm2, %xmm1
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v16i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpaddw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddw (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v16i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddw (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <16 x i16>, <16 x i16>* %src, align 32, !nontemporal !1
%2 = add <16 x i16> %arg, %1
@@ -858,30 +1019,40 @@ define <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) {
}
define <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) {
-; SSE-LABEL: test_arg_v32i8:
-; SSE: # BB#0:
-; SSE-NEXT: paddb (%rdi), %xmm0
-; SSE-NEXT: paddb 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v32i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddb (%rdi), %xmm0
+; SSE2-NEXT: paddb 16(%rdi), %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v32i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: paddb %xmm3, %xmm0
+; SSE41-NEXT: paddb %xmm2, %xmm1
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v32i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v32i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddb (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v32i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddb (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <32 x i8>, <32 x i8>* %src, align 32, !nontemporal !1
%2 = add <32 x i8> %arg, %1
@@ -891,23 +1062,50 @@ define <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) {
; And now ZMM versions.
define <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) {
-; SSE-LABEL: test_arg_v16f32:
-; SSE: # BB#0:
-; SSE-NEXT: addps (%rdi), %xmm0
-; SSE-NEXT: addps 16(%rdi), %xmm1
-; SSE-NEXT: addps 32(%rdi), %xmm2
-; SSE-NEXT: addps 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v16f32:
+; SSE2: # BB#0:
+; SSE2-NEXT: addps (%rdi), %xmm0
+; SSE2-NEXT: addps 16(%rdi), %xmm1
+; SSE2-NEXT: addps 32(%rdi), %xmm2
+; SSE2-NEXT: addps 48(%rdi), %xmm3
+; SSE2-NEXT: retq
;
-; AVX-LABEL: test_arg_v16f32:
-; AVX: # BB#0:
-; AVX-NEXT: vaddps (%rdi), %ymm0, %ymm0
-; AVX-NEXT: vaddps 32(%rdi), %ymm1, %ymm1
-; AVX-NEXT: retq
+; SSE41-LABEL: test_arg_v16f32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: addps %xmm7, %xmm0
+; SSE41-NEXT: addps %xmm6, %xmm1
+; SSE41-NEXT: addps %xmm5, %xmm2
+; SSE41-NEXT: addps %xmm4, %xmm3
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: test_arg_v16f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
+; AVX1-NEXT: vaddps %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_arg_v16f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vaddps %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v16f32:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddps (%rdi), %zmm0, %zmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1
%2 = fadd <16 x float> %arg, %1
@@ -915,39 +1113,54 @@ define <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) {
}
define <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) {
-; SSE-LABEL: test_arg_v16i32:
-; SSE: # BB#0:
-; SSE-NEXT: paddd (%rdi), %xmm0
-; SSE-NEXT: paddd 16(%rdi), %xmm1
-; SSE-NEXT: paddd 32(%rdi), %xmm2
-; SSE-NEXT: paddd 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v16i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddd (%rdi), %xmm0
+; SSE2-NEXT: paddd 16(%rdi), %xmm1
+; SSE2-NEXT: paddd 32(%rdi), %xmm2
+; SSE2-NEXT: paddd 48(%rdi), %xmm3
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v16i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: paddd %xmm7, %xmm0
+; SSE41-NEXT: paddd %xmm6, %xmm1
+; SSE41-NEXT: paddd %xmm5, %xmm2
+; SSE41-NEXT: paddd %xmm4, %xmm3
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v16i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm2
-; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpaddd %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpaddd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v16i32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddd (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddd 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vpaddd %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v16i32:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddd (%rdi), %zmm0, %zmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1
%2 = add <16 x i32> %arg, %1
@@ -955,23 +1168,50 @@ define <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) {
}
define <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) {
-; SSE-LABEL: test_arg_v8f64:
-; SSE: # BB#0:
-; SSE-NEXT: addpd (%rdi), %xmm0
-; SSE-NEXT: addpd 16(%rdi), %xmm1
-; SSE-NEXT: addpd 32(%rdi), %xmm2
-; SSE-NEXT: addpd 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: addpd (%rdi), %xmm0
+; SSE2-NEXT: addpd 16(%rdi), %xmm1
+; SSE2-NEXT: addpd 32(%rdi), %xmm2
+; SSE2-NEXT: addpd 48(%rdi), %xmm3
+; SSE2-NEXT: retq
;
-; AVX-LABEL: test_arg_v8f64:
-; AVX: # BB#0:
-; AVX-NEXT: vaddpd (%rdi), %ymm0, %ymm0
-; AVX-NEXT: vaddpd 32(%rdi), %ymm1, %ymm1
-; AVX-NEXT: retq
+; SSE41-LABEL: test_arg_v8f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: addpd %xmm7, %xmm0
+; SSE41-NEXT: addpd %xmm6, %xmm1
+; SSE41-NEXT: addpd %xmm5, %xmm2
+; SSE41-NEXT: addpd %xmm4, %xmm3
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: test_arg_v8f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
+; AVX1-NEXT: vaddpd %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_arg_v8f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vaddpd %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v8f64:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddpd (%rdi), %zmm0, %zmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1
%2 = fadd <8 x double> %arg, %1
@@ -979,39 +1219,54 @@ define <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) {
}
define <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) {
-; SSE-LABEL: test_arg_v8i64:
-; SSE: # BB#0:
-; SSE-NEXT: paddq (%rdi), %xmm0
-; SSE-NEXT: paddq 16(%rdi), %xmm1
-; SSE-NEXT: paddq 32(%rdi), %xmm2
-; SSE-NEXT: paddq 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddq (%rdi), %xmm0
+; SSE2-NEXT: paddq 16(%rdi), %xmm1
+; SSE2-NEXT: paddq 32(%rdi), %xmm2
+; SSE2-NEXT: paddq 48(%rdi), %xmm3
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v8i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: paddq %xmm7, %xmm0
+; SSE41-NEXT: paddq %xmm6, %xmm1
+; SSE41-NEXT: paddq %xmm5, %xmm2
+; SSE41-NEXT: paddq %xmm4, %xmm3
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v8i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm2
-; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v8i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddq (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddq 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v8i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddq (%rdi), %zmm0, %zmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1
%2 = add <8 x i64> %arg, %1
@@ -1019,51 +1274,70 @@ define <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) {
}
define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) {
-; SSE-LABEL: test_arg_v32i16:
-; SSE: # BB#0:
-; SSE-NEXT: paddw (%rdi), %xmm0
-; SSE-NEXT: paddw 16(%rdi), %xmm1
-; SSE-NEXT: paddw 32(%rdi), %xmm2
-; SSE-NEXT: paddw 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v32i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddw (%rdi), %xmm0
+; SSE2-NEXT: paddw 16(%rdi), %xmm1
+; SSE2-NEXT: paddw 32(%rdi), %xmm2
+; SSE2-NEXT: paddw 48(%rdi), %xmm3
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v32i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: paddw %xmm7, %xmm0
+; SSE41-NEXT: paddw %xmm6, %xmm1
+; SSE41-NEXT: paddw %xmm5, %xmm2
+; SSE41-NEXT: paddw %xmm4, %xmm3
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v32i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm2
-; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpaddw %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpaddw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpaddw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpaddw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpaddw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v32i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddw (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vpaddw %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: test_arg_v32i16:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpaddw (%rdi), %ymm0, %ymm0
-; AVX512F-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1
+; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX512F-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX512F-NEXT: vpaddw %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_arg_v32i16:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpaddw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: test_arg_v32i16:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpaddw (%rdi), %ymm0, %ymm0
-; AVX512VL-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1
+; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX512VL-NEXT: vpaddw %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX512VL-NEXT: retq
%1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1
%2 = add <32 x i16> %arg, %1
@@ -1071,51 +1345,70 @@ define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) {
}
define <64 x i8> @test_arg_v64i8(<64 x i8> %arg, <64 x i8>* %src) {
-; SSE-LABEL: test_arg_v64i8:
-; SSE: # BB#0:
-; SSE-NEXT: paddb (%rdi), %xmm0
-; SSE-NEXT: paddb 16(%rdi), %xmm1
-; SSE-NEXT: paddb 32(%rdi), %xmm2
-; SSE-NEXT: paddb 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v64i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddb (%rdi), %xmm0
+; SSE2-NEXT: paddb 16(%rdi), %xmm1
+; SSE2-NEXT: paddb 32(%rdi), %xmm2
+; SSE2-NEXT: paddb 48(%rdi), %xmm3
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v64i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: paddb %xmm7, %xmm0
+; SSE41-NEXT: paddb %xmm6, %xmm1
+; SSE41-NEXT: paddb %xmm5, %xmm2
+; SSE41-NEXT: paddb %xmm4, %xmm3
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v64i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm2
-; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpaddb %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpaddb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v64i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddb (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: test_arg_v64i8:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpaddb (%rdi), %ymm0, %ymm0
-; AVX512F-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1
+; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX512F-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_arg_v64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpaddb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: test_arg_v64i8:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpaddb (%rdi), %ymm0, %ymm0
-; AVX512VL-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1
+; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX512VL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX512VL-NEXT: retq
%1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1
%2 = add <64 x i8> %arg, %1
diff --git a/test/CodeGen/X86/pr32659.ll b/test/CodeGen/X86/pr32659.ll
new file mode 100644
index 000000000000..aafae9c4f6c9
--- /dev/null
+++ b/test/CodeGen/X86/pr32659.ll
@@ -0,0 +1,83 @@
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+@a = external global i32, align 4
+@d = external global i32*, align 4
+@k = external global i32**, align 4
+@j = external global i32***, align 4
+@h = external global i32, align 4
+@c = external global i32, align 4
+@i = external global i32, align 4
+@b = external global i32, align 4
+@f = external global i64, align 8
+@e = external global i64, align 8
+@g = external global i32, align 4
+
+; Function Attrs: norecurse nounwind optsize readnone
+declare i32 @fn1(i32 returned) #0
+
+
+; CHECK-LABEL: fn2
+; CHECK: calll putchar
+; CHECK: addl $1,
+; CHECK: adcl $0,
+; Function Attrs: nounwind optsize
+define void @fn2() #1 {
+entry:
+ %putchar = tail call i32 @putchar(i32 48)
+ %0 = load volatile i32, i32* @h, align 4
+ %1 = load i32, i32* @c, align 4, !tbaa !2
+ %2 = load i32***, i32**** @j, align 4
+ %3 = load i32**, i32*** %2, align 4
+ %4 = load i32*, i32** %3, align 4
+ %5 = load i32, i32* %4, align 4
+ %cmp = icmp sgt i32 %1, %5
+ %conv = zext i1 %cmp to i32
+ %6 = load i32, i32* @i, align 4
+ %cmp1 = icmp sgt i32 %6, %conv
+ %conv2 = zext i1 %cmp1 to i32
+ store i32 %conv2, i32* @b, align 4
+ %cmp3 = icmp sgt i32 %0, %conv2
+ %conv4 = zext i1 %cmp3 to i32
+ %7 = load i32, i32* @a, align 4
+ %or = xor i32 %7, %conv4
+ store i32 %or, i32* @a, align 4
+ %8 = load i32*, i32** @d, align 4
+ %9 = load i32, i32* %8, align 4
+ %conv6 = sext i32 %9 to i64
+ %10 = load i64, i64* @e, align 8
+ %and = and i64 %10, %conv6
+ store i64 %and, i64* @e, align 8
+ %11 = load i32, i32* @g, align 4
+ %dec = add nsw i32 %11, -1
+ store i32 %dec, i32* @g, align 4
+ %12 = load i64, i64* @f, align 8
+ %inc = add nsw i64 %12, 1
+ store i64 %inc, i64* @f, align 8
+ ret void
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @main() #1
+
+; Function Attrs: nounwind
+declare i32 @putchar(i32) #2
+
+attributes #0 = { optsize readnone }
+attributes #1 = { optsize }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"NumRegisterParameters", i32 0}
+!1 = !{!"clang version 5.0.0 (trunk 300074) (llvm/trunk 300078)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"any pointer", !4, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"long long", !4, i64 0}
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 7c2937936313..0e8db74fe1bd 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -314,13 +314,13 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
; GENERIC-NEXT: jmp LBB7_6
; GENERIC-NEXT: LBB7_4:
; GENERIC-NEXT: movd %r9d, %xmm1
-; GENERIC-NEXT: movd %ecx, %xmm2
+; GENERIC-NEXT: movd %r8d, %xmm2
; GENERIC-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; GENERIC-NEXT: movd %r8d, %xmm3
+; GENERIC-NEXT: movd %ecx, %xmm3
; GENERIC-NEXT: movd %edx, %xmm1
; GENERIC-NEXT: LBB7_6:
; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; GENERIC-NEXT: psubd {{.*}}(%rip), %xmm1
; GENERIC-NEXT: psubd {{.*}}(%rip), %xmm0
; GENERIC-NEXT: movq %xmm0, 16(%rsi)
@@ -350,16 +350,16 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; ATOM-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
-; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; ATOM-NEXT: jmp LBB7_6
; ATOM-NEXT: LBB7_4:
; ATOM-NEXT: movd %r9d, %xmm1
-; ATOM-NEXT: movd %ecx, %xmm2
+; ATOM-NEXT: movd %r8d, %xmm2
; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; ATOM-NEXT: movd %r8d, %xmm3
+; ATOM-NEXT: movd %ecx, %xmm3
; ATOM-NEXT: movd %edx, %xmm1
; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; ATOM-NEXT: LBB7_6:
; ATOM-NEXT: psubd {{.*}}(%rip), %xmm0
; ATOM-NEXT: psubd {{.*}}(%rip), %xmm1
diff --git a/test/CodeGen/X86/selectiondag-dominator.ll b/test/CodeGen/X86/selectiondag-dominator.ll
new file mode 100644
index 000000000000..f289a16f29eb
--- /dev/null
+++ b/test/CodeGen/X86/selectiondag-dominator.ll
@@ -0,0 +1,30 @@
+; Make sure we don't crash because we have a stale dominator tree.
+; PR33266
+; REQUIRES: asserts
+; RUN: llc -o /dev/null -verify-dom-info %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+@global = external global [8 x [8 x [4 x i8]]], align 2
+@global.1 = external global { i8, [3 x i8] }, align 4
+
+define void @patatino() local_unnamed_addr {
+bb:
+ br label %bb1
+
+bb1:
+ br label %bb2
+
+bb2:
+ br i1 icmp ne (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)), label %bb4, label %bb3
+
+bb3:
+ br i1 icmp eq (i64 ashr (i64 shl (i64 zext (i32 srem (i32 7, i32 zext (i1 icmp eq (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)) to i32)) to i64), i64 56), i64 56), i64 0), label %bb5, label %bb4
+
+bb4:
+ %tmp = phi i64 [ ashr (i64 shl (i64 zext (i32 srem (i32 7, i32 zext (i1 icmp eq (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)) to i32)) to i64), i64 56), i64 56), %bb3 ], [ 7, %bb2 ]
+ ret void
+
+bb5:
+ ret void
+}
diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
index 0b03dffe99b5..e468c69db5dd 100644
--- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -53,17 +53,17 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-NEXT: movl %esi, (%esp)
; X32-NEXT: andl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: leal -4(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebp
@@ -86,18 +86,18 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X64-NEXT: shrq $32, %rsi
; X64-NEXT: shrq $32, %rdi
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl %r8d, %edi
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl %eax, %esi
; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
%arg1 = bitcast <4 x float> %a1 to <4 x i32>
@@ -121,15 +121,15 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-NEXT: notl %edx
-; X32-NEXT: notl %ecx
; X32-NEXT: notl %esi
+; X32-NEXT: notl %ecx
; X32-NEXT: notl %eax
; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl %eax, (%esp)
-; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X32-NEXT: andl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -138,7 +138,7 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: leal -4(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebp
@@ -165,18 +165,18 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
; X64-NEXT: notl %esi
; X64-NEXT: notl %edx
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl %r8d, %edx
; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl %edi, %esi
; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
%arg1 = bitcast <4 x float> %a1 to <4 x i32>
@@ -1277,17 +1277,17 @@ define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-NEXT: movl %esi, (%esp)
; X32-NEXT: orl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: leal -4(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebp
@@ -1310,18 +1310,18 @@ define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X64-NEXT: shrq $32, %rsi
; X64-NEXT: shrq $32, %rdi
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: orl %r8d, %edi
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: orl %eax, %esi
; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
%arg1 = bitcast <4 x float> %a1 to <4 x i32>
@@ -1538,16 +1538,16 @@ define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) n
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_ps:
; X64: # BB#0:
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm1[0]
; X64-NEXT: movaps %xmm3, %xmm0
; X64-NEXT: retq
%res0 = insertelement <4 x float> undef, float %a3, i32 0
@@ -1677,16 +1677,16 @@ define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3)
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_ps:
; X64: # BB#0:
-; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
%res0 = insertelement <4 x float> undef, float %a0, i32 0
%res1 = insertelement <4 x float> %res0, float %a1, i32 1
@@ -2239,17 +2239,17 @@ define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-NEXT: movl %esi, (%esp)
; X32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: leal -4(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebp
@@ -2272,18 +2272,18 @@ define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X64-NEXT: shrq $32, %rsi
; X64-NEXT: shrq $32, %rdi
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: xorl %r8d, %edi
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: xorl %eax, %esi
; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
%arg1 = bitcast <4 x float> %a1 to <4 x i32>
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
index f4964b5a6f66..c74dec3e21b6 100644
--- a/test/CodeGen/X86/sse1.ll
+++ b/test/CodeGen/X86/sse1.ll
@@ -87,17 +87,17 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: .LBB1_11: # %entry
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X32-NEXT: retl
;
; X64-LABEL: vselect:
; X64: # BB#0: # %entry
-; X64-NEXT: testl %ecx, %ecx
+; X64-NEXT: testl %edx, %edx
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: je .LBB1_1
; X64-NEXT: # BB#2: # %entry
; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: testl %edx, %edx
+; X64-NEXT: testl %ecx, %ecx
; X64-NEXT: jne .LBB1_5
; X64-NEXT: .LBB1_4:
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
@@ -111,7 +111,7 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X64-NEXT: jmp .LBB1_11
; X64-NEXT: .LBB1_1:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT: testl %edx, %edx
+; X64-NEXT: testl %ecx, %ecx
; X64-NEXT: je .LBB1_4
; X64-NEXT: .LBB1_5: # %entry
; X64-NEXT: xorps %xmm2, %xmm2
@@ -126,7 +126,7 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: .LBB1_11: # %entry
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
entry:
%a1 = icmp eq <4 x i32> %q, zeroinitializer
@@ -252,12 +252,12 @@ define <2 x float> @PR31672() #0 {
; X32-NEXT: movl %eax, (%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: andl %ecx, %edx
-; X32-NEXT: notl %ecx
-; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: orl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: andl %eax, %ecx
+; X32-NEXT: notl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: andl %ecx, %edx
@@ -277,7 +277,7 @@ define <2 x float> @PR31672() #0 {
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
@@ -297,48 +297,48 @@ define <2 x float> @PR31672() #0 {
; X64-NEXT: mulps %xmm1, %xmm0
; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
+; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
-; X64-NEXT: movl %r9d, %esi
-; X64-NEXT: andl %edi, %esi
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl %edi, %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: notl %ecx
+; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; X64-NEXT: andl %eax, %ecx
-; X64-NEXT: orl %esi, %ecx
+; X64-NEXT: andl %edx, %ecx
+; X64-NEXT: orl %eax, %ecx
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %r8d, %ecx
-; X64-NEXT: andl %r10d, %ecx
-; X64-NEXT: movl %r10d, %esi
-; X64-NEXT: notl %esi
-; X64-NEXT: andl %edx, %esi
-; X64-NEXT: orl %ecx, %esi
-; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
-; X64-NEXT: shrq $32, %r9
+; X64-NEXT: shrq $32, %rsi
; X64-NEXT: shrq $32, %rdi
-; X64-NEXT: andl %edi, %r9d
+; X64-NEXT: andl %edi, %esi
; X64-NEXT: notl %edi
-; X64-NEXT: shrq $32, %rax
-; X64-NEXT: andl %edi, %eax
-; X64-NEXT: orl %r9d, %eax
-; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: shrq $32, %r8
-; X64-NEXT: shrq $32, %r10
-; X64-NEXT: andl %r10d, %r8d
-; X64-NEXT: notl %r10d
; X64-NEXT: shrq $32, %rdx
-; X64-NEXT: andl %r10d, %edx
-; X64-NEXT: orl %r8d, %edx
+; X64-NEXT: andl %edi, %edx
+; X64-NEXT: orl %esi, %edx
; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %r8d, %eax
+; X64-NEXT: andl %r9d, %eax
+; X64-NEXT: movl %r9d, %ecx
+; X64-NEXT: notl %ecx
+; X64-NEXT: andl %r10d, %ecx
+; X64-NEXT: orl %eax, %ecx
+; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
+; X64-NEXT: shrq $32, %r8
+; X64-NEXT: shrq $32, %r9
+; X64-NEXT: andl %r9d, %r8d
+; X64-NEXT: notl %r9d
+; X64-NEXT: shrq $32, %r10
+; X64-NEXT: andl %r9d, %r10d
+; X64-NEXT: orl %r8d, %r10d
+; X64-NEXT: movl %r10d, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%t0 = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> <float 42.0, float 3.0>)
ret <2 x float> %t0
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 20387ccd6b7a..ff5d624e6042 100644
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -2076,7 +2076,7 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm2
; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2087,8 +2087,8 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm1
; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2099,7 +2099,7 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm3
; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2110,27 +2110,27 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi8:
; X64: # BB#0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: movzbl %r8b, %eax
+; X64-NEXT: movzbl %dl, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %cl, %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; X64-NEXT: movzbl %dl, %eax
+; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-NEXT: movzbl %r8b, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %r9b, %eax
; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
@@ -2138,20 +2138,20 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: movzbl %r9b, %eax
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm2
@@ -2161,9 +2161,9 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
%res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
@@ -2206,11 +2206,11 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4,
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi16:
@@ -2218,20 +2218,20 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4,
; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
; X64-NEXT: movd %edi, %xmm0
-; X64-NEXT: movd %r8d, %xmm1
+; X64-NEXT: movd %esi, %xmm1
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X64-NEXT: movd %edx, %xmm0
-; X64-NEXT: movd %eax, %xmm2
+; X64-NEXT: movd %ecx, %xmm2
; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X64-NEXT: movd %esi, %xmm0
+; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-NEXT: movd %r8d, %xmm0
; X64-NEXT: movd %r9d, %xmm1
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: movd %ecx, %xmm3
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: movd %r10d, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
%res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
%res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
@@ -2254,18 +2254,18 @@ define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi32:
; X64: # BB#0:
; X64-NEXT: movd %edi, %xmm0
-; X64-NEXT: movd %edx, %xmm1
+; X64-NEXT: movd %esi, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %edx, %xmm2
; X64-NEXT: movd %ecx, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
%res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
@@ -2282,11 +2282,11 @@ define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi64x:
@@ -2441,10 +2441,9 @@ define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
; X32-LABEL: test_mm_set1_epi64x:
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set1_epi64x:
@@ -2486,7 +2485,7 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm2
; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2497,8 +2496,8 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm1
; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2509,7 +2508,7 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm3
; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2520,9 +2519,9 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi8:
@@ -2534,46 +2533,46 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %r9b, %eax
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %r9b, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %dl, %eax
+; X64-NEXT: movzbl %r8b, %eax
; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT: movzbl %cl, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %r8b, %eax
+; X64-NEXT: movzbl %dl, %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movd %eax, %xmm4
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
%res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
@@ -2616,11 +2615,11 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi16:
@@ -2628,20 +2627,20 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4
; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movd %ecx, %xmm1
+; X64-NEXT: movd %r10d, %xmm1
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X64-NEXT: movd %r9d, %xmm0
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %r8d, %xmm2
; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X64-NEXT: movd %r10d, %xmm0
+; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-NEXT: movd %ecx, %xmm0
; X64-NEXT: movd %edx, %xmm1
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: movd %r8d, %xmm3
+; X64-NEXT: movd %esi, %xmm3
; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
%res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
%res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
@@ -2664,18 +2663,18 @@ define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwin
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi32:
; X64: # BB#0:
; X64-NEXT: movd %ecx, %xmm0
-; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: movd %edx, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X64-NEXT: movd %edx, %xmm2
+; X64-NEXT: movd %esi, %xmm2
; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
%res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
@@ -2692,11 +2691,11 @@ define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi64x:
diff --git a/test/CodeGen/X86/sse3-avx-addsub-2.ll b/test/CodeGen/X86/sse3-avx-addsub-2.ll
index 4d895ea264c5..b5aa26f532ef 100644
--- a/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -342,9 +342,8 @@ define <4 x float> @test14(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: subss %xmm1, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1,1,3]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test14:
@@ -375,8 +374,7 @@ define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE-NEXT: addss %xmm0, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,2,1]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[0,0]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
@@ -417,10 +415,10 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE-NEXT: addss %xmm0, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test16:
diff --git a/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
index 19305d0dad62..383ab21bd404 100644
--- a/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
@@ -354,8 +354,9 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) {
; X32-LABEL: test_mm_crc32_u8:
; X32: # BB#0:
+; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: crc32b {{[0-9]+}}(%esp), %eax
+; X32-NEXT: crc32b %cl, %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_crc32_u8:
@@ -371,8 +372,9 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) {
; X32-LABEL: test_mm_crc32_u16:
; X32: # BB#0:
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: crc32w {{[0-9]+}}(%esp), %eax
+; X32-NEXT: crc32w %cx, %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_crc32_u16:
diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll
index f937d484ce0d..4165aea8794f 100644
--- a/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1651,9 +1651,26 @@ define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) {
}
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
-; TODO stack_fold_sqrtsd
+define double @stack_fold_sqrtsd(double %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtsd
+ ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call double @llvm.sqrt.f64(double %a0)
+ ret double %2
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
; TODO stack_fold_sqrtsd_int
-; TODO stack_fold_sqrtss
+
+define float @stack_fold_sqrtss(float %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtss
+ ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call float @llvm.sqrt.f32(float %a0)
+ ret float %2
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
; TODO stack_fold_sqrtss_int
define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll
index 5c6f697610a0..3ca94b7b9467 100644
--- a/test/CodeGen/X86/stack-folding-int-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-int-sse42.ll
@@ -453,6 +453,21 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin
; TODO stack_fold_pextrb
+; We can't naively fold pextrw as it only writes to a 16-bit memory location
+; even though it can store to a 32-bit register.
+define i16 @stack_fold_pextrw(<8 x i16> %a0) {
+; CHECK-LABEL: stack_fold_pextrw
+; CHECK: pextrw $1, {{%xmm[0-9][0-9]*}}, %[[GPR32:(e[a-z]+|r[0-9]+d)]]
+; CHECK: movl %[[GPR32]], {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Spill
+; CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
+entry:
+; add forces execution domain
+ %add = add <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
+ %extract = extractelement <8 x i16> %add, i32 1
+ %asm = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i16 %extract
+}
+
define i32 @stack_fold_pextrd(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pextrd
;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
@@ -473,8 +488,6 @@ define i64 @stack_fold_pextrq(<2 x i64> %a0) {
ret i64 %1
}
-; TODO stack_fold_pextrw
-
define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_phaddd
;CHECK: phaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 3c99928824bc..8e253f11e93e 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -1,16 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; An integer truncation to i1 should be done with an and instruction to make
; sure only the LSBit survives. Test that this is the case both for a returned
; value and as the operand of a branch.
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s
define zeroext i1 @test1(i32 %X) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK-NEXT: andb $1, %al
+; CHECK-NEXT: retl
%Y = trunc i32 %X to i1
ret i1 %Y
}
-; CHECK-LABEL: test1:
-; CHECK: andb $1, %al
define i1 @test2(i32 %val, i32 %mask) nounwind {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: btl %ecx, %eax
+; CHECK-NEXT: jae .LBB1_2
+; CHECK-NEXT: # BB#1: # %ret_true
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB1_2: # %ret_false
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retl
entry:
%shifted = ashr i32 %val, %mask
%anded = and i32 %shifted, 1
@@ -21,10 +37,19 @@ ret_true:
ret_false:
ret i1 false
}
-; CHECK-LABEL: test2:
-; CHECK: btl
define i32 @test3(i8* %ptr) nounwind {
+; CHECK-LABEL: test3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: testb $1, (%eax)
+; CHECK-NEXT: je .LBB2_2
+; CHECK-NEXT: # BB#1: # %cond_true
+; CHECK-NEXT: movl $21, %eax
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB2_2: # %cond_false
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: retl
%val = load i8, i8* %ptr
%tmp = trunc i8 %val to i1
br i1 %tmp, label %cond_true, label %cond_false
@@ -33,10 +58,18 @@ cond_true:
cond_false:
ret i32 42
}
-; CHECK-LABEL: test3:
-; CHECK: testb $1, (%eax)
define i32 @test4(i8* %ptr) nounwind {
+; CHECK-LABEL: test4:
+; CHECK: # BB#0:
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB3_2
+; CHECK-NEXT: # BB#1: # %cond_true
+; CHECK-NEXT: movl $21, %eax
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB3_2: # %cond_false
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: retl
%tmp = ptrtoint i8* %ptr to i1
br i1 %tmp, label %cond_true, label %cond_false
cond_true:
@@ -44,10 +77,29 @@ cond_true:
cond_false:
ret i32 42
}
-; CHECK-LABEL: test4:
-; CHECK: testb $1, 4(%esp)
define i32 @test5(double %d) nounwind {
+; CHECK-LABEL: test5:
+; CHECK: # BB#0:
+; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: fnstcw (%esp)
+; CHECK-NEXT: movzwl (%esp), %eax
+; CHECK-NEXT: movw $3199, (%esp) # imm = 0xC7F
+; CHECK-NEXT: fldcw (%esp)
+; CHECK-NEXT: movw %ax, (%esp)
+; CHECK-NEXT: fistps {{[0-9]+}}(%esp)
+; CHECK-NEXT: fldcw (%esp)
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB4_2
+; CHECK-NEXT: # BB#1: # %cond_true
+; CHECK-NEXT: movl $21, %eax
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB4_2: # %cond_false
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: retl
%tmp = fptosi double %d to i1
br i1 %tmp, label %cond_true, label %cond_false
cond_true:
@@ -55,5 +107,3 @@ cond_true:
cond_false:
ret i32 42
}
-; CHECK-LABEL: test5:
-; CHECK: testb $1
diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll
index 477150016486..6cfe41ac503d 100644
--- a/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/test/CodeGen/X86/vec_fp_to_int.ll
@@ -1320,17 +1320,17 @@ define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
; SSE-NEXT: cvttss2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-NEXT: cvttss2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-NEXT: cvttss2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -1560,33 +1560,33 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
; SSE-NEXT: cvttss2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm3
-; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1]
; SSE-NEXT: cvttss2si %xmm3, %rax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE-NEXT: movaps %xmm1, %xmm2
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: movaps %xmm1, %xmm3
-; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1]
; SSE-NEXT: cvttss2si %xmm3, %rax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE-NEXT: cvttss2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; SSE-NEXT: cvttss2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; SSE-NEXT: movdqa %xmm2, %xmm1
; SSE-NEXT: retq
;
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
index a42b3c96c3ae..7cb1c95cb01a 100644
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1169,16 +1169,16 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-LABEL: sitofp_4i64_to_4f32_undef:
; SSE: # BB#0:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -1368,21 +1368,22 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-LABEL: sitofp_4i64_to_4f32:
; SSE: # BB#0:
; SSE-NEXT: movq %xmm1, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_4i64_to_4f32:
@@ -1838,21 +1839,14 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-LABEL: uitofp_4i64_to_4f32_undef:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: testq %rax, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: js .LBB41_2
-; SSE-NEXT: # BB#1:
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: .LBB41_2:
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
-; SSE-NEXT: js .LBB41_3
-; SSE-NEXT: # BB#4:
+; SSE-NEXT: js .LBB41_1
+; SSE-NEXT: # BB#2:
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: jmp .LBB41_5
-; SSE-NEXT: .LBB41_3:
+; SSE-NEXT: jmp .LBB41_3
+; SSE-NEXT: .LBB41_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
@@ -1860,17 +1854,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: addss %xmm0, %xmm0
-; SSE-NEXT: .LBB41_5:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: .LBB41_3:
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
-; SSE-NEXT: js .LBB41_6
-; SSE-NEXT: # BB#7:
+; SSE-NEXT: js .LBB41_4
+; SSE-NEXT: # BB#5:
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: jmp .LBB41_8
-; SSE-NEXT: .LBB41_6:
+; SSE-NEXT: jmp .LBB41_6
+; SSE-NEXT: .LBB41_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
@@ -1878,9 +1871,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: addss %xmm1, %xmm1
-; SSE-NEXT: .LBB41_8:
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: .LBB41_6:
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: js .LBB41_8
+; SSE-NEXT: # BB#7:
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: .LBB41_8:
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; SSE-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2149,32 +2149,32 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB47_1
; SSE-NEXT: # BB#2:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: jmp .LBB47_3
; SSE-NEXT: .LBB47_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB47_3:
-; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB47_4
; SSE-NEXT: # BB#5:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
; SSE-NEXT: jmp .LBB47_6
; SSE-NEXT: .LBB47_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: addss %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
; SSE-NEXT: .LBB47_6:
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB47_7
; SSE-NEXT: # BB#8:
@@ -2208,9 +2208,9 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB47_12:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_4i64_to_4f32:
@@ -3381,22 +3381,23 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
; SSE-LABEL: sitofp_load_4i64_to_4f32:
; SSE: # BB#0:
; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm2
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_load_4i64_to_4f32:
@@ -3546,41 +3547,42 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
; SSE-LABEL: sitofp_load_8i64_to_8f32:
; SSE: # BB#0:
; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm2
-; SSE-NEXT: movdqa 32(%rdi), %xmm3
-; SSE-NEXT: movdqa 48(%rdi), %xmm4
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm5
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
+; SSE-NEXT: movdqa 32(%rdi), %xmm2
+; SSE-NEXT: movdqa 48(%rdi), %xmm3
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: movq %xmm4, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: xorps %xmm4, %xmm4
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; SSE-NEXT: movq %xmm2, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT: movq %xmm3, %rax
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_load_8i64_to_8f32:
@@ -3822,73 +3824,73 @@ define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) {
define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
; SSE-LABEL: uitofp_load_4i64_to_4f32:
; SSE: # BB#0:
-; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm3
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: movdqa (%rdi), %xmm2
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_1
; SSE-NEXT: # BB#2:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: jmp .LBB76_3
; SSE-NEXT: .LBB76_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: addss %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: addss %xmm1, %xmm1
; SSE-NEXT: .LBB76_3:
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_4
; SSE-NEXT: # BB#5:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
; SSE-NEXT: jmp .LBB76_6
; SSE-NEXT: .LBB76_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
; SSE-NEXT: .LBB76_6:
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_7
; SSE-NEXT: # BB#8:
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: jmp .LBB76_9
; SSE-NEXT: .LBB76_7:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB76_9:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_10
; SSE-NEXT: # BB#11:
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: jmp .LBB76_12
; SSE-NEXT: .LBB76_10:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: addss %xmm1, %xmm1
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB76_12:
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_load_4i64_to_4f32:
@@ -4186,121 +4188,121 @@ define <4 x float> @uitofp_load_4i8_to_4f32(<4 x i8> *%a) {
define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
; SSE-LABEL: uitofp_load_8i64_to_8f32:
; SSE: # BB#0:
-; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm5
+; SSE-NEXT: movdqa (%rdi), %xmm5
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
; SSE-NEXT: movdqa 32(%rdi), %xmm2
-; SSE-NEXT: movdqa 48(%rdi), %xmm3
-; SSE-NEXT: movq %xmm5, %rax
+; SSE-NEXT: movdqa 48(%rdi), %xmm1
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_1
; SSE-NEXT: # BB#2:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
; SSE-NEXT: jmp .LBB80_3
; SSE-NEXT: .LBB80_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm4
-; SSE-NEXT: addss %xmm4, %xmm4
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
; SSE-NEXT: .LBB80_3:
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_4
; SSE-NEXT: # BB#5:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
; SSE-NEXT: jmp .LBB80_6
; SSE-NEXT: .LBB80_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: addss %xmm4, %xmm4
; SSE-NEXT: .LBB80_6:
-; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
; SSE-NEXT: movq %xmm5, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_7
; SSE-NEXT: # BB#8:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm6
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: jmp .LBB80_9
; SSE-NEXT: .LBB80_7:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm6
-; SSE-NEXT: addss %xmm6, %xmm6
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB80_9:
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
+; SSE-NEXT: movq %xmm5, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_10
; SSE-NEXT: # BB#11:
-; SSE-NEXT: xorps %xmm5, %xmm5
-; SSE-NEXT: cvtsi2ssq %rax, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm6
; SSE-NEXT: jmp .LBB80_12
; SSE-NEXT: .LBB80_10:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm5, %xmm5
-; SSE-NEXT: cvtsi2ssq %rax, %xmm5
-; SSE-NEXT: addss %xmm5, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm6
+; SSE-NEXT: addss %xmm6, %xmm6
; SSE-NEXT: .LBB80_12:
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_13
; SSE-NEXT: # BB#14:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm7
+; SSE-NEXT: xorps %xmm5, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm5
; SSE-NEXT: jmp .LBB80_15
; SSE-NEXT: .LBB80_13:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm7
-; SSE-NEXT: addss %xmm7, %xmm7
+; SSE-NEXT: xorps %xmm5, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm5
+; SSE-NEXT: addss %xmm5, %xmm5
; SSE-NEXT: .LBB80_15:
-; SSE-NEXT: movq %xmm2, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_16
; SSE-NEXT: # BB#17:
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm7
; SSE-NEXT: jmp .LBB80_18
; SSE-NEXT: .LBB80_16:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: addss %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm7
+; SSE-NEXT: addss %xmm7, %xmm7
; SSE-NEXT: .LBB80_18:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_19
; SSE-NEXT: # BB#20:
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: jmp .LBB80_21
; SSE-NEXT: .LBB80_19:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: addss %xmm1, %xmm1
; SSE-NEXT: .LBB80_21:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
@@ -4318,8 +4320,8 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB80_24:
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_load_8i64_to_8f32:
diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll
index 6439a6dcb00b..918430efea1d 100644
--- a/test/CodeGen/X86/vec_set.ll
+++ b/test/CodeGen/X86/vec_set.ll
@@ -12,35 +12,35 @@ define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i1
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
-; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; X86-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; X86-NEXT: movdqa %xmm3, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: test:
; X64: # BB#0:
-; X64-NEXT: movd %r8d, %xmm0
+; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT: movd %edx, %xmm1
-; X64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: movd %ecx, %xmm0
+; X64-NEXT: movd %r9d, %xmm0
; X64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X64-NEXT: movd %r9d, %xmm2
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: movd %r8d, %xmm1
+; X64-NEXT: movd %ecx, %xmm2
+; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-NEXT: movd %edx, %xmm1
; X64-NEXT: movd %esi, %xmm3
-; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
; X64-NEXT: movdqa %xmm3, (%rdi)
; X64-NEXT: retq
%tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0
diff --git a/test/CodeGen/X86/vector-compare-results.ll b/test/CodeGen/X86/vector-compare-results.ll
index 4fa9596192a6..ce0b067f5043 100644
--- a/test/CodeGen/X86/vector-compare-results.ll
+++ b/test/CodeGen/X86/vector-compare-results.ll
@@ -5345,217 +5345,213 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
;
; AVX1-LABEL: test_cmp_v64i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm9
-; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
-; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT: vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm2
-; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm3
-; AVX1-NEXT: vpacksswb %xmm0, %xmm3, %xmm3
-; AVX1-NEXT: vpextrb $15, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $14, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $12, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $10, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $8, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $6, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $4, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $2, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $0, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $15, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $14, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $12, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $10, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $8, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $0, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $6, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $4, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $2, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $0, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $14, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $12, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $10, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $8, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $6, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $4, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $2, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $0, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $15, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $14, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $12, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $10, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $8, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $6, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $4, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $2, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $0, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $15, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $14, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $12, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $10, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $8, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $0, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $6, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $4, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $2, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $14, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $12, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $10, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $8, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $6, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $4, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $2, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $0, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: vzeroupper
@@ -5565,207 +5561,203 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm1, %xmm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
; AVX2-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
-; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
+; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm6
; AVX2-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vpextrb $15, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm7
+; AVX2-NEXT: vpextrb $14, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
; AVX2-NEXT: vpextrb $0, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $15, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
; AVX2-NEXT: vpextrb $0, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $15, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: vpextrb $0, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $15, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: vzeroupper
diff --git a/test/CodeGen/X86/vector-rem.ll b/test/CodeGen/X86/vector-rem.ll
index 340dd77ec481..3e3e93a7d5b0 100644
--- a/test/CodeGen/X86/vector-rem.ll
+++ b/test/CodeGen/X86/vector-rem.ll
@@ -11,9 +11,9 @@ define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) nounwind {
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; CHECK-NEXT: movd %xmm3, %eax
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; CHECK-NEXT: movd %xmm3, %ecx
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
@@ -24,15 +24,15 @@ define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) nounwind {
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; CHECK-NEXT: movd %xmm0, %ecx
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
; CHECK-NEXT: movd %edx, %xmm0
; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%m = srem <4 x i32> %t, %u
@@ -49,9 +49,9 @@ define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) nounwind {
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; CHECK-NEXT: movd %xmm3, %eax
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; CHECK-NEXT: movd %xmm3, %ecx
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
@@ -62,15 +62,15 @@ define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) nounwind {
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; CHECK-NEXT: movd %xmm0, %ecx
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
; CHECK-NEXT: movd %edx, %xmm0
; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%m = urem <4 x i32> %t, %u
@@ -88,9 +88,9 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind {
; CHECK-NEXT: callq fmodf
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; CHECK-NEXT: callq fmodf
; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
@@ -100,15 +100,15 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind {
; CHECK-NEXT: callq fmodf
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; CHECK-NEXT: callq fmodf
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklps (%rsp), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
%m = frem <4 x float> %t, %u
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
index 53e471d6f175..392c0de95f24 100644
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -1333,19 +1333,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: shlq $61, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $61, %rcx
+; SSE2-NEXT: shlq $62, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: shlq $63, %rax
; SSE2-NEXT: sarq $63, %rax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_4i1_to_4i32:
@@ -1356,19 +1356,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: shlq $61, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $61, %rcx
+; SSSE3-NEXT: shlq $62, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: shlq $63, %rax
; SSSE3-NEXT: sarq $63, %rax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_sext_4i1_to_4i32:
@@ -1523,14 +1523,14 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; SSE2-NEXT: shrl $3, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl %ecx
+; SSE2-NEXT: shrl $2, %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: shrl $2, %eax
+; SSE2-NEXT: shrl %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
; SSE2-NEXT: psllq $63, %xmm0
@@ -1549,14 +1549,14 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; SSSE3-NEXT: shrl $3, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl %ecx
+; SSSE3-NEXT: shrl $2, %ecx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: shrl $2, %eax
+; SSSE3-NEXT: shrl %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSSE3-NEXT: pand {{.*}}(%rip), %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
; SSSE3-NEXT: psllq $63, %xmm0
@@ -1813,7 +1813,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSE2-NEXT: shrq $7, %rcx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $60, %rcx
+; SSE2-NEXT: shlq $57, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
@@ -1822,13 +1822,13 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: shlq $59, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $57, %rcx
+; SSE2-NEXT: shlq $60, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movq %rax, %rcx
@@ -1837,15 +1837,15 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $59, %rcx
+; SSE2-NEXT: shlq $62, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm3
; SSE2-NEXT: shlq $63, %rax
; SSE2-NEXT: sarq $63, %rax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_8i1_to_8i16:
@@ -1855,7 +1855,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSSE3-NEXT: shrq $7, %rcx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $60, %rcx
+; SSSE3-NEXT: shlq $57, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
@@ -1864,13 +1864,13 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: shlq $59, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $57, %rcx
+; SSSE3-NEXT: shlq $60, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movq %rax, %rcx
@@ -1879,15 +1879,15 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $59, %rcx
+; SSSE3-NEXT: shlq $62, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm3
; SSSE3-NEXT: shlq $63, %rax
; SSSE3-NEXT: sarq $63, %rax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_sext_8i1_to_8i16:
@@ -2191,7 +2191,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movzbl (%rdi), %eax
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $6, %ecx
+; SSE2-NEXT: shrl $3, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
@@ -2203,30 +2203,30 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $4, %ecx
+; SSE2-NEXT: shrl %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: movl %eax, %ecx
; SSE2-NEXT: shrl $5, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl %ecx
+; SSE2-NEXT: shrl $4, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $3, %ecx
+; SSE2-NEXT: shrl $6, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: shrl $7, %eax
; SSE2-NEXT: movzwl %ax, %eax
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pslld $31, %xmm0
@@ -2240,7 +2240,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movzbl (%rdi), %eax
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $6, %ecx
+; SSSE3-NEXT: shrl $3, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
@@ -2252,30 +2252,30 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $4, %ecx
+; SSSE3-NEXT: shrl %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: movl %eax, %ecx
; SSSE3-NEXT: shrl $5, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl %ecx
+; SSSE3-NEXT: shrl $4, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $3, %ecx
+; SSSE3-NEXT: shrl $6, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: shrl $7, %eax
; SSSE3-NEXT: movzwl %ax, %eax
; SSSE3-NEXT: movd %eax, %xmm3
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSSE3-NEXT: pslld $31, %xmm0
@@ -2546,69 +2546,69 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
; SSE2-NEXT: movq %rax, %rsi
; SSE2-NEXT: movq %rax, %rdi
; SSE2-NEXT: movq %rax, %rbp
-; SSE2-NEXT: shlq $49, %rbp
-; SSE2-NEXT: sarq $63, %rbp
+; SSE2-NEXT: shrq $15, %rbp
; SSE2-NEXT: movd %ebp, %xmm0
; SSE2-NEXT: movq %rax, %rbp
; SSE2-NEXT: movsbq %al, %rax
-; SSE2-NEXT: shlq $57, %r8
+; SSE2-NEXT: shlq $49, %r8
; SSE2-NEXT: sarq $63, %r8
; SSE2-NEXT: movd %r8d, %xmm1
-; SSE2-NEXT: shlq $53, %r9
+; SSE2-NEXT: shlq $50, %r9
; SSE2-NEXT: sarq $63, %r9
; SSE2-NEXT: movd %r9d, %xmm2
-; SSE2-NEXT: shlq $61, %r10
+; SSE2-NEXT: shlq $51, %r10
; SSE2-NEXT: sarq $63, %r10
; SSE2-NEXT: movd %r10d, %xmm3
-; SSE2-NEXT: shlq $51, %r11
+; SSE2-NEXT: shlq $52, %r11
; SSE2-NEXT: sarq $63, %r11
; SSE2-NEXT: movd %r11d, %xmm4
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: shlq $59, %r14
+; SSE2-NEXT: shlq $53, %r14
; SSE2-NEXT: sarq $63, %r14
-; SSE2-NEXT: movd %r14d, %xmm5
+; SSE2-NEXT: movd %r14d, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: shlq $55, %r15
+; SSE2-NEXT: shlq $54, %r15
; SSE2-NEXT: sarq $63, %r15
; SSE2-NEXT: movd %r15d, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-NEXT: shlq $63, %r12
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: shlq $55, %r12
; SSE2-NEXT: sarq $63, %r12
-; SSE2-NEXT: movd %r12d, %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSE2-NEXT: shlq $50, %r13
+; SSE2-NEXT: movd %r12d, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: shlq $60, %r13
; SSE2-NEXT: sarq $63, %r13
-; SSE2-NEXT: movd %r13d, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: shlq $58, %rbx
+; SSE2-NEXT: movd %r13d, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: shlq $61, %rbx
; SSE2-NEXT: sarq $63, %rbx
; SSE2-NEXT: movd %ebx, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSE2-NEXT: shlq $54, %rcx
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: shlq $62, %rcx
; SSE2-NEXT: sarq $63, %rcx
-; SSE2-NEXT: movd %ecx, %xmm4
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: shlq $62, %rdx
+; SSE2-NEXT: movd %ecx, %xmm5
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE2-NEXT: shlq $63, %rdx
; SSE2-NEXT: sarq $63, %rdx
-; SSE2-NEXT: movd %edx, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: shlq $52, %rsi
+; SSE2-NEXT: movd %edx, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSE2-NEXT: shlq $58, %rsi
; SSE2-NEXT: sarq $63, %rsi
-; SSE2-NEXT: movd %esi, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE2-NEXT: shlq $60, %rdi
+; SSE2-NEXT: movd %esi, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSE2-NEXT: shlq $59, %rdi
; SSE2-NEXT: sarq $63, %rdi
; SSE2-NEXT: movd %edi, %xmm4
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSE2-NEXT: shrq $15, %rbp
-; SSE2-NEXT: movd %ebp, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE2-NEXT: shlq $57, %rbp
+; SSE2-NEXT: sarq $63, %rbp
+; SSE2-NEXT: movd %ebp, %xmm2
; SSE2-NEXT: shrq $7, %rax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: popq %rbx
; SSE2-NEXT: popq %r12
; SSE2-NEXT: popq %r13
@@ -2640,69 +2640,69 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
; SSSE3-NEXT: movq %rax, %rsi
; SSSE3-NEXT: movq %rax, %rdi
; SSSE3-NEXT: movq %rax, %rbp
-; SSSE3-NEXT: shlq $49, %rbp
-; SSSE3-NEXT: sarq $63, %rbp
+; SSSE3-NEXT: shrq $15, %rbp
; SSSE3-NEXT: movd %ebp, %xmm0
; SSSE3-NEXT: movq %rax, %rbp
; SSSE3-NEXT: movsbq %al, %rax
-; SSSE3-NEXT: shlq $57, %r8
+; SSSE3-NEXT: shlq $49, %r8
; SSSE3-NEXT: sarq $63, %r8
; SSSE3-NEXT: movd %r8d, %xmm1
-; SSSE3-NEXT: shlq $53, %r9
+; SSSE3-NEXT: shlq $50, %r9
; SSSE3-NEXT: sarq $63, %r9
; SSSE3-NEXT: movd %r9d, %xmm2
-; SSSE3-NEXT: shlq $61, %r10
+; SSSE3-NEXT: shlq $51, %r10
; SSSE3-NEXT: sarq $63, %r10
; SSSE3-NEXT: movd %r10d, %xmm3
-; SSSE3-NEXT: shlq $51, %r11
+; SSSE3-NEXT: shlq $52, %r11
; SSSE3-NEXT: sarq $63, %r11
; SSSE3-NEXT: movd %r11d, %xmm4
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: shlq $59, %r14
+; SSSE3-NEXT: shlq $53, %r14
; SSSE3-NEXT: sarq $63, %r14
-; SSSE3-NEXT: movd %r14d, %xmm5
+; SSSE3-NEXT: movd %r14d, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: shlq $55, %r15
+; SSSE3-NEXT: shlq $54, %r15
; SSSE3-NEXT: sarq $63, %r15
; SSSE3-NEXT: movd %r15d, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSSE3-NEXT: shlq $63, %r12
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT: shlq $55, %r12
; SSSE3-NEXT: sarq $63, %r12
-; SSSE3-NEXT: movd %r12d, %xmm0
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSSE3-NEXT: shlq $50, %r13
+; SSSE3-NEXT: movd %r12d, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSSE3-NEXT: shlq $60, %r13
; SSSE3-NEXT: sarq $63, %r13
-; SSSE3-NEXT: movd %r13d, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: shlq $58, %rbx
+; SSSE3-NEXT: movd %r13d, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: shlq $61, %rbx
; SSSE3-NEXT: sarq $63, %rbx
; SSSE3-NEXT: movd %ebx, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSSE3-NEXT: shlq $54, %rcx
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: shlq $62, %rcx
; SSSE3-NEXT: sarq $63, %rcx
-; SSSE3-NEXT: movd %ecx, %xmm4
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT: shlq $62, %rdx
+; SSSE3-NEXT: movd %ecx, %xmm5
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSSE3-NEXT: shlq $63, %rdx
; SSSE3-NEXT: sarq $63, %rdx
-; SSSE3-NEXT: movd %edx, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT: shlq $52, %rsi
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSSE3-NEXT: shlq $58, %rsi
; SSSE3-NEXT: sarq $63, %rsi
-; SSSE3-NEXT: movd %esi, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSSE3-NEXT: shlq $60, %rdi
+; SSSE3-NEXT: movd %esi, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSSE3-NEXT: shlq $59, %rdi
; SSSE3-NEXT: sarq $63, %rdi
; SSSE3-NEXT: movd %edi, %xmm4
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSSE3-NEXT: shrq $15, %rbp
-; SSSE3-NEXT: movd %ebp, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT: shlq $57, %rbp
+; SSSE3-NEXT: sarq $63, %rbp
+; SSSE3-NEXT: movd %ebp, %xmm2
; SSSE3-NEXT: shrq $7, %rax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: popq %rbx
; SSSE3-NEXT: popq %r12
; SSSE3-NEXT: popq %r13
@@ -3002,7 +3002,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movzwl (%rdi), %eax
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $14, %ecx
+; SSE2-NEXT: shrl $7, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
@@ -3011,21 +3011,21 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $10, %ecx
+; SSE2-NEXT: shrl $5, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $2, %ecx
+; SSE2-NEXT: shrl $4, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $12, %ecx
+; SSE2-NEXT: shrl $3, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $4, %ecx
+; SSE2-NEXT: shrl $2, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm3
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
@@ -3033,18 +3033,18 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: shrl %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $13, %ecx
+; SSE2-NEXT: shrl $11, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $5, %ecx
+; SSE2-NEXT: shrl $10, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
@@ -3053,31 +3053,31 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm3
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl %ecx
+; SSE2-NEXT: shrl $8, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $11, %ecx
+; SSE2-NEXT: shrl $13, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $3, %ecx
+; SSE2-NEXT: shrl $12, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm3
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $7, %ecx
+; SSE2-NEXT: shrl $14, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: shrl $15, %eax
; SSE2-NEXT: movzwl %ax, %eax
; SSE2-NEXT: movd %eax, %xmm4
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psllw $15, %xmm0
@@ -3091,7 +3091,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movzwl (%rdi), %eax
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $14, %ecx
+; SSSE3-NEXT: shrl $7, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
@@ -3100,21 +3100,21 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $10, %ecx
+; SSSE3-NEXT: shrl $5, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $2, %ecx
+; SSSE3-NEXT: shrl $4, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $12, %ecx
+; SSSE3-NEXT: shrl $3, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $4, %ecx
+; SSSE3-NEXT: shrl $2, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm3
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
@@ -3122,18 +3122,18 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $8, %ecx
+; SSSE3-NEXT: shrl %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $13, %ecx
+; SSSE3-NEXT: shrl $11, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $5, %ecx
+; SSSE3-NEXT: shrl $10, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
@@ -3142,31 +3142,31 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm3
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl %ecx
+; SSSE3-NEXT: shrl $8, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $11, %ecx
+; SSSE3-NEXT: shrl $13, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $3, %ecx
+; SSSE3-NEXT: shrl $12, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm3
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $7, %ecx
+; SSSE3-NEXT: shrl $14, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: shrl $15, %eax
; SSSE3-NEXT: movzwl %ax, %eax
; SSSE3-NEXT: movd %eax, %xmm4
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: psllw $15, %xmm0
@@ -3556,162 +3556,162 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
; SSE2-NEXT: pushq %r13
; SSE2-NEXT: pushq %r12
; SSE2-NEXT: pushq %rbx
-; SSE2-NEXT: movswq (%rdi), %rbx
-; SSE2-NEXT: movq %rbx, %r10
-; SSE2-NEXT: movq %rbx, %r8
-; SSE2-NEXT: movq %rbx, %r9
-; SSE2-NEXT: movq %rbx, %r11
-; SSE2-NEXT: movq %rbx, %r14
-; SSE2-NEXT: movq %rbx, %r15
-; SSE2-NEXT: movq %rbx, %r12
-; SSE2-NEXT: movq %rbx, %r13
-; SSE2-NEXT: movq %rbx, %rdx
-; SSE2-NEXT: movq %rbx, %rsi
-; SSE2-NEXT: movq %rbx, %rcx
-; SSE2-NEXT: movq %rbx, %rbp
-; SSE2-NEXT: movq %rbx, %rax
-; SSE2-NEXT: shlq $49, %rax
-; SSE2-NEXT: sarq $63, %rax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movq %rbx, %rax
-; SSE2-NEXT: shlq $57, %r10
+; SSE2-NEXT: movswq (%rdi), %rax
+; SSE2-NEXT: movq %rax, %r10
+; SSE2-NEXT: movq %rax, %r8
+; SSE2-NEXT: movq %rax, %r9
+; SSE2-NEXT: movq %rax, %r11
+; SSE2-NEXT: movq %rax, %r14
+; SSE2-NEXT: movq %rax, %r15
+; SSE2-NEXT: movq %rax, %r12
+; SSE2-NEXT: movq %rax, %r13
+; SSE2-NEXT: movq %rax, %rdx
+; SSE2-NEXT: movq %rax, %rsi
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: movq %rax, %rbp
+; SSE2-NEXT: movq %rax, %rbx
+; SSE2-NEXT: shrq $15, %rbx
+; SSE2-NEXT: movd %ebx, %xmm0
+; SSE2-NEXT: movq %rax, %rbx
+; SSE2-NEXT: shlq $49, %r10
; SSE2-NEXT: sarq $63, %r10
; SSE2-NEXT: movd %r10d, %xmm15
-; SSE2-NEXT: movq %rbx, %r10
-; SSE2-NEXT: movsbq %bl, %rbx
+; SSE2-NEXT: movq %rax, %r10
+; SSE2-NEXT: movsbq %al, %rax
; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSE2-NEXT: shlq $53, %r8
+; SSE2-NEXT: shlq $50, %r8
; SSE2-NEXT: sarq $63, %r8
; SSE2-NEXT: movd %r8d, %xmm8
-; SSE2-NEXT: shlq $61, %r9
+; SSE2-NEXT: shlq $51, %r9
; SSE2-NEXT: sarq $63, %r9
-; SSE2-NEXT: movd %r9d, %xmm2
-; SSE2-NEXT: shlq $51, %r11
+; SSE2-NEXT: movd %r9d, %xmm3
+; SSE2-NEXT: shlq $52, %r11
; SSE2-NEXT: sarq $63, %r11
; SSE2-NEXT: movd %r11d, %xmm9
-; SSE2-NEXT: shlq $59, %r14
+; SSE2-NEXT: shlq $53, %r14
; SSE2-NEXT: sarq $63, %r14
-; SSE2-NEXT: movd %r14d, %xmm5
-; SSE2-NEXT: shlq $55, %r15
+; SSE2-NEXT: movd %r14d, %xmm6
+; SSE2-NEXT: shlq $54, %r15
; SSE2-NEXT: sarq $63, %r15
; SSE2-NEXT: movd %r15d, %xmm10
-; SSE2-NEXT: shlq $63, %r12
+; SSE2-NEXT: shlq $55, %r12
; SSE2-NEXT: sarq $63, %r12
-; SSE2-NEXT: movd %r12d, %xmm0
-; SSE2-NEXT: shlq $50, %r13
+; SSE2-NEXT: movd %r12d, %xmm2
+; SSE2-NEXT: shlq $60, %r13
; SSE2-NEXT: sarq $63, %r13
; SSE2-NEXT: movd %r13d, %xmm11
-; SSE2-NEXT: shlq $58, %rdx
+; SSE2-NEXT: shlq $61, %rdx
; SSE2-NEXT: sarq $63, %rdx
-; SSE2-NEXT: movd %edx, %xmm4
-; SSE2-NEXT: shlq $54, %rsi
+; SSE2-NEXT: movd %edx, %xmm5
+; SSE2-NEXT: shlq $62, %rsi
; SSE2-NEXT: sarq $63, %rsi
; SSE2-NEXT: movd %esi, %xmm12
-; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: shlq $63, %rcx
; SSE2-NEXT: sarq $63, %rcx
-; SSE2-NEXT: movd %ecx, %xmm6
-; SSE2-NEXT: shlq $52, %rbp
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: shlq $58, %rbp
; SSE2-NEXT: sarq $63, %rbp
; SSE2-NEXT: movd %ebp, %xmm13
-; SSE2-NEXT: shlq $60, %rax
-; SSE2-NEXT: sarq $63, %rax
-; SSE2-NEXT: movd %eax, %xmm7
-; SSE2-NEXT: shrq $15, %r10
-; SSE2-NEXT: movd %r10d, %xmm14
-; SSE2-NEXT: shrq $7, %rbx
-; SSE2-NEXT: movd %ebx, %xmm3
-; SSE2-NEXT: movswq 2(%rdi), %rdx
-; SSE2-NEXT: movq %rdx, %r8
-; SSE2-NEXT: movq %rdx, %r9
-; SSE2-NEXT: movq %rdx, %r10
-; SSE2-NEXT: movq %rdx, %r11
-; SSE2-NEXT: movq %rdx, %r14
-; SSE2-NEXT: movq %rdx, %r15
-; SSE2-NEXT: movq %rdx, %r12
-; SSE2-NEXT: movq %rdx, %r13
-; SSE2-NEXT: movq %rdx, %rbx
-; SSE2-NEXT: movq %rdx, %rax
-; SSE2-NEXT: movq %rdx, %rcx
-; SSE2-NEXT: movq %rdx, %rsi
-; SSE2-NEXT: movq %rdx, %rdi
-; SSE2-NEXT: movq %rdx, %rbp
-; SSE2-NEXT: shlq $49, %rbp
-; SSE2-NEXT: sarq $63, %rbp
+; SSE2-NEXT: shlq $59, %rbx
+; SSE2-NEXT: sarq $63, %rbx
+; SSE2-NEXT: movd %ebx, %xmm7
+; SSE2-NEXT: shlq $57, %r10
+; SSE2-NEXT: sarq $63, %r10
+; SSE2-NEXT: movd %r10d, %xmm4
+; SSE2-NEXT: shrq $7, %rax
+; SSE2-NEXT: movd %eax, %xmm14
+; SSE2-NEXT: movswq 2(%rdi), %rsi
+; SSE2-NEXT: movq %rsi, %r8
+; SSE2-NEXT: movq %rsi, %r9
+; SSE2-NEXT: movq %rsi, %r10
+; SSE2-NEXT: movq %rsi, %r11
+; SSE2-NEXT: movq %rsi, %r14
+; SSE2-NEXT: movq %rsi, %r15
+; SSE2-NEXT: movq %rsi, %r12
+; SSE2-NEXT: movq %rsi, %r13
+; SSE2-NEXT: movq %rsi, %rbx
+; SSE2-NEXT: movq %rsi, %rax
+; SSE2-NEXT: movq %rsi, %rcx
+; SSE2-NEXT: movq %rsi, %rdx
+; SSE2-NEXT: movq %rsi, %rdi
+; SSE2-NEXT: movq %rsi, %rbp
+; SSE2-NEXT: shrq $15, %rbp
; SSE2-NEXT: movd %ebp, %xmm1
-; SSE2-NEXT: movq %rdx, %rbp
-; SSE2-NEXT: movsbq %dl, %rdx
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; SSE2-NEXT: movq %rsi, %rbp
+; SSE2-NEXT: movsbq %sil, %rsi
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
-; SSE2-NEXT: shlq $57, %r8
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
+; SSE2-NEXT: shlq $49, %r8
; SSE2-NEXT: sarq $63, %r8
-; SSE2-NEXT: movd %r8d, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
-; SSE2-NEXT: shlq $53, %r9
+; SSE2-NEXT: movd %r8d, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; SSE2-NEXT: shlq $50, %r9
; SSE2-NEXT: sarq $63, %r9
-; SSE2-NEXT: movd %r9d, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSE2-NEXT: shlq $61, %r10
+; SSE2-NEXT: movd %r9d, %xmm4
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE2-NEXT: shlq $51, %r10
; SSE2-NEXT: sarq $63, %r10
-; SSE2-NEXT: movd %r10d, %xmm4
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
-; SSE2-NEXT: shlq $51, %r11
+; SSE2-NEXT: movd %r10d, %xmm5
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: shlq $52, %r11
; SSE2-NEXT: sarq $63, %r11
-; SSE2-NEXT: movd %r11d, %xmm5
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: shlq $59, %r14
+; SSE2-NEXT: movd %r11d, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: shlq $53, %r14
; SSE2-NEXT: sarq $63, %r14
-; SSE2-NEXT: movd %r14d, %xmm6
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT: shlq $55, %r15
+; SSE2-NEXT: movd %r14d, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSE2-NEXT: shlq $54, %r15
; SSE2-NEXT: sarq $63, %r15
-; SSE2-NEXT: movd %r15d, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT: shlq $63, %r12
+; SSE2-NEXT: movd %r15d, %xmm4
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
+; SSE2-NEXT: shlq $55, %r12
; SSE2-NEXT: sarq $63, %r12
-; SSE2-NEXT: movd %r12d, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; SSE2-NEXT: shlq $50, %r13
+; SSE2-NEXT: movd %r12d, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: shlq $60, %r13
; SSE2-NEXT: sarq $63, %r13
; SSE2-NEXT: movd %r13d, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT: shlq $58, %rbx
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE2-NEXT: shlq $61, %rbx
; SSE2-NEXT: sarq $63, %rbx
-; SSE2-NEXT: movd %ebx, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSE2-NEXT: shlq $54, %rax
+; SSE2-NEXT: movd %ebx, %xmm4
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: shlq $62, %rax
; SSE2-NEXT: sarq $63, %rax
-; SSE2-NEXT: movd %eax, %xmm5
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
-; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: movd %eax, %xmm6
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; SSE2-NEXT: shlq $63, %rcx
; SSE2-NEXT: sarq $63, %rcx
-; SSE2-NEXT: movd %ecx, %xmm4
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: shlq $52, %rsi
-; SSE2-NEXT: sarq $63, %rsi
-; SSE2-NEXT: movd %esi, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT: shlq $60, %rdi
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT: shlq $58, %rdx
+; SSE2-NEXT: sarq $63, %rdx
+; SSE2-NEXT: movd %edx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSE2-NEXT: shlq $59, %rdi
; SSE2-NEXT: sarq $63, %rdi
-; SSE2-NEXT: movd %edi, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: shrq $15, %rbp
+; SSE2-NEXT: movd %edi, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT: shlq $57, %rbp
+; SSE2-NEXT: sarq $63, %rbp
; SSE2-NEXT: movd %ebp, %xmm2
-; SSE2-NEXT: shrq $7, %rdx
-; SSE2-NEXT: movd %edx, %xmm5
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE2-NEXT: shrq $7, %rsi
+; SSE2-NEXT: movd %esi, %xmm5
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE2-NEXT: popq %rbx
; SSE2-NEXT: popq %r12
; SSE2-NEXT: popq %r13
@@ -3728,162 +3728,162 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
; SSSE3-NEXT: pushq %r13
; SSSE3-NEXT: pushq %r12
; SSSE3-NEXT: pushq %rbx
-; SSSE3-NEXT: movswq (%rdi), %rbx
-; SSSE3-NEXT: movq %rbx, %r10
-; SSSE3-NEXT: movq %rbx, %r8
-; SSSE3-NEXT: movq %rbx, %r9
-; SSSE3-NEXT: movq %rbx, %r11
-; SSSE3-NEXT: movq %rbx, %r14
-; SSSE3-NEXT: movq %rbx, %r15
-; SSSE3-NEXT: movq %rbx, %r12
-; SSSE3-NEXT: movq %rbx, %r13
-; SSSE3-NEXT: movq %rbx, %rdx
-; SSSE3-NEXT: movq %rbx, %rsi
-; SSSE3-NEXT: movq %rbx, %rcx
-; SSSE3-NEXT: movq %rbx, %rbp
-; SSSE3-NEXT: movq %rbx, %rax
-; SSSE3-NEXT: shlq $49, %rax
-; SSSE3-NEXT: sarq $63, %rax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movq %rbx, %rax
-; SSSE3-NEXT: shlq $57, %r10
+; SSSE3-NEXT: movswq (%rdi), %rax
+; SSSE3-NEXT: movq %rax, %r10
+; SSSE3-NEXT: movq %rax, %r8
+; SSSE3-NEXT: movq %rax, %r9
+; SSSE3-NEXT: movq %rax, %r11
+; SSSE3-NEXT: movq %rax, %r14
+; SSSE3-NEXT: movq %rax, %r15
+; SSSE3-NEXT: movq %rax, %r12
+; SSSE3-NEXT: movq %rax, %r13
+; SSSE3-NEXT: movq %rax, %rdx
+; SSSE3-NEXT: movq %rax, %rsi
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: movq %rax, %rbp
+; SSSE3-NEXT: movq %rax, %rbx
+; SSSE3-NEXT: shrq $15, %rbx
+; SSSE3-NEXT: movd %ebx, %xmm0
+; SSSE3-NEXT: movq %rax, %rbx
+; SSSE3-NEXT: shlq $49, %r10
; SSSE3-NEXT: sarq $63, %r10
; SSSE3-NEXT: movd %r10d, %xmm15
-; SSSE3-NEXT: movq %rbx, %r10
-; SSSE3-NEXT: movsbq %bl, %rbx
+; SSSE3-NEXT: movq %rax, %r10
+; SSSE3-NEXT: movsbq %al, %rax
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSSE3-NEXT: shlq $53, %r8
+; SSSE3-NEXT: shlq $50, %r8
; SSSE3-NEXT: sarq $63, %r8
; SSSE3-NEXT: movd %r8d, %xmm8
-; SSSE3-NEXT: shlq $61, %r9
+; SSSE3-NEXT: shlq $51, %r9
; SSSE3-NEXT: sarq $63, %r9
-; SSSE3-NEXT: movd %r9d, %xmm2
-; SSSE3-NEXT: shlq $51, %r11
+; SSSE3-NEXT: movd %r9d, %xmm3
+; SSSE3-NEXT: shlq $52, %r11
; SSSE3-NEXT: sarq $63, %r11
; SSSE3-NEXT: movd %r11d, %xmm9
-; SSSE3-NEXT: shlq $59, %r14
+; SSSE3-NEXT: shlq $53, %r14
; SSSE3-NEXT: sarq $63, %r14
-; SSSE3-NEXT: movd %r14d, %xmm5
-; SSSE3-NEXT: shlq $55, %r15
+; SSSE3-NEXT: movd %r14d, %xmm6
+; SSSE3-NEXT: shlq $54, %r15
; SSSE3-NEXT: sarq $63, %r15
; SSSE3-NEXT: movd %r15d, %xmm10
-; SSSE3-NEXT: shlq $63, %r12
+; SSSE3-NEXT: shlq $55, %r12
; SSSE3-NEXT: sarq $63, %r12
-; SSSE3-NEXT: movd %r12d, %xmm0
-; SSSE3-NEXT: shlq $50, %r13
+; SSSE3-NEXT: movd %r12d, %xmm2
+; SSSE3-NEXT: shlq $60, %r13
; SSSE3-NEXT: sarq $63, %r13
; SSSE3-NEXT: movd %r13d, %xmm11
-; SSSE3-NEXT: shlq $58, %rdx
+; SSSE3-NEXT: shlq $61, %rdx
; SSSE3-NEXT: sarq $63, %rdx
-; SSSE3-NEXT: movd %edx, %xmm4
-; SSSE3-NEXT: shlq $54, %rsi
+; SSSE3-NEXT: movd %edx, %xmm5
+; SSSE3-NEXT: shlq $62, %rsi
; SSSE3-NEXT: sarq $63, %rsi
; SSSE3-NEXT: movd %esi, %xmm12
-; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: shlq $63, %rcx
; SSSE3-NEXT: sarq $63, %rcx
-; SSSE3-NEXT: movd %ecx, %xmm6
-; SSSE3-NEXT: shlq $52, %rbp
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: shlq $58, %rbp
; SSSE3-NEXT: sarq $63, %rbp
; SSSE3-NEXT: movd %ebp, %xmm13
-; SSSE3-NEXT: shlq $60, %rax
-; SSSE3-NEXT: sarq $63, %rax
-; SSSE3-NEXT: movd %eax, %xmm7
-; SSSE3-NEXT: shrq $15, %r10
-; SSSE3-NEXT: movd %r10d, %xmm14
-; SSSE3-NEXT: shrq $7, %rbx
-; SSSE3-NEXT: movd %ebx, %xmm3
-; SSSE3-NEXT: movswq 2(%rdi), %rdx
-; SSSE3-NEXT: movq %rdx, %r8
-; SSSE3-NEXT: movq %rdx, %r9
-; SSSE3-NEXT: movq %rdx, %r10
-; SSSE3-NEXT: movq %rdx, %r11
-; SSSE3-NEXT: movq %rdx, %r14
-; SSSE3-NEXT: movq %rdx, %r15
-; SSSE3-NEXT: movq %rdx, %r12
-; SSSE3-NEXT: movq %rdx, %r13
-; SSSE3-NEXT: movq %rdx, %rbx
-; SSSE3-NEXT: movq %rdx, %rax
-; SSSE3-NEXT: movq %rdx, %rcx
-; SSSE3-NEXT: movq %rdx, %rsi
-; SSSE3-NEXT: movq %rdx, %rdi
-; SSSE3-NEXT: movq %rdx, %rbp
-; SSSE3-NEXT: shlq $49, %rbp
-; SSSE3-NEXT: sarq $63, %rbp
+; SSSE3-NEXT: shlq $59, %rbx
+; SSSE3-NEXT: sarq $63, %rbx
+; SSSE3-NEXT: movd %ebx, %xmm7
+; SSSE3-NEXT: shlq $57, %r10
+; SSSE3-NEXT: sarq $63, %r10
+; SSSE3-NEXT: movd %r10d, %xmm4
+; SSSE3-NEXT: shrq $7, %rax
+; SSSE3-NEXT: movd %eax, %xmm14
+; SSSE3-NEXT: movswq 2(%rdi), %rsi
+; SSSE3-NEXT: movq %rsi, %r8
+; SSSE3-NEXT: movq %rsi, %r9
+; SSSE3-NEXT: movq %rsi, %r10
+; SSSE3-NEXT: movq %rsi, %r11
+; SSSE3-NEXT: movq %rsi, %r14
+; SSSE3-NEXT: movq %rsi, %r15
+; SSSE3-NEXT: movq %rsi, %r12
+; SSSE3-NEXT: movq %rsi, %r13
+; SSSE3-NEXT: movq %rsi, %rbx
+; SSSE3-NEXT: movq %rsi, %rax
+; SSSE3-NEXT: movq %rsi, %rcx
+; SSSE3-NEXT: movq %rsi, %rdx
+; SSSE3-NEXT: movq %rsi, %rdi
+; SSSE3-NEXT: movq %rsi, %rbp
+; SSSE3-NEXT: shrq $15, %rbp
; SSSE3-NEXT: movd %ebp, %xmm1
-; SSSE3-NEXT: movq %rdx, %rbp
-; SSSE3-NEXT: movsbq %dl, %rdx
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; SSSE3-NEXT: movq %rsi, %rbp
+; SSSE3-NEXT: movsbq %sil, %rsi
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
-; SSSE3-NEXT: shlq $57, %r8
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
+; SSSE3-NEXT: shlq $49, %r8
; SSSE3-NEXT: sarq $63, %r8
-; SSSE3-NEXT: movd %r8d, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
-; SSSE3-NEXT: shlq $53, %r9
+; SSSE3-NEXT: movd %r8d, %xmm3
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; SSSE3-NEXT: shlq $50, %r9
; SSSE3-NEXT: sarq $63, %r9
-; SSSE3-NEXT: movd %r9d, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSSE3-NEXT: shlq $61, %r10
+; SSSE3-NEXT: movd %r9d, %xmm4
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSSE3-NEXT: shlq $51, %r10
; SSSE3-NEXT: sarq $63, %r10
-; SSSE3-NEXT: movd %r10d, %xmm4
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
-; SSSE3-NEXT: shlq $51, %r11
+; SSSE3-NEXT: movd %r10d, %xmm5
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSSE3-NEXT: shlq $52, %r11
; SSSE3-NEXT: sarq $63, %r11
-; SSSE3-NEXT: movd %r11d, %xmm5
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT: shlq $59, %r14
+; SSSE3-NEXT: movd %r11d, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSSE3-NEXT: shlq $53, %r14
; SSSE3-NEXT: sarq $63, %r14
-; SSSE3-NEXT: movd %r14d, %xmm6
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT: shlq $55, %r15
+; SSSE3-NEXT: movd %r14d, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSSE3-NEXT: shlq $54, %r15
; SSSE3-NEXT: sarq $63, %r15
-; SSSE3-NEXT: movd %r15d, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT: shlq $63, %r12
+; SSSE3-NEXT: movd %r15d, %xmm4
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
+; SSSE3-NEXT: shlq $55, %r12
; SSSE3-NEXT: sarq $63, %r12
-; SSSE3-NEXT: movd %r12d, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; SSSE3-NEXT: shlq $50, %r13
+; SSSE3-NEXT: movd %r12d, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: shlq $60, %r13
; SSSE3-NEXT: sarq $63, %r13
; SSSE3-NEXT: movd %r13d, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT: shlq $58, %rbx
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSSE3-NEXT: shlq $61, %rbx
; SSSE3-NEXT: sarq $63, %rbx
-; SSSE3-NEXT: movd %ebx, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSSE3-NEXT: shlq $54, %rax
+; SSSE3-NEXT: movd %ebx, %xmm4
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT: shlq $62, %rax
; SSSE3-NEXT: sarq $63, %rax
-; SSSE3-NEXT: movd %eax, %xmm5
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
-; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: movd %eax, %xmm6
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; SSSE3-NEXT: shlq $63, %rcx
; SSSE3-NEXT: sarq $63, %rcx
-; SSSE3-NEXT: movd %ecx, %xmm4
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: shlq $52, %rsi
-; SSSE3-NEXT: sarq $63, %rsi
-; SSSE3-NEXT: movd %esi, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT: shlq $60, %rdi
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT: shlq $58, %rdx
+; SSSE3-NEXT: sarq $63, %rdx
+; SSSE3-NEXT: movd %edx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSSE3-NEXT: shlq $59, %rdi
; SSSE3-NEXT: sarq $63, %rdi
-; SSSE3-NEXT: movd %edi, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: shrq $15, %rbp
+; SSSE3-NEXT: movd %edi, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT: shlq $57, %rbp
+; SSSE3-NEXT: sarq $63, %rbp
; SSSE3-NEXT: movd %ebp, %xmm2
-; SSSE3-NEXT: shrq $7, %rdx
-; SSSE3-NEXT: movd %edx, %xmm5
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSSE3-NEXT: shrq $7, %rsi
+; SSSE3-NEXT: movd %esi, %xmm5
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSSE3-NEXT: popq %rbx
; SSSE3-NEXT: popq %r12
; SSSE3-NEXT: popq %r13
diff --git a/test/CodeGen/X86/vector-shuffle-v48.ll b/test/CodeGen/X86/vector-shuffle-v48.ll
new file mode 100644
index 000000000000..9bd75148ecd1
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-v48.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-pc-linux -mattr=+avx2 < %s | FileCheck %s
+define <16 x i8> @foo(<48 x i8>* %x0, <16 x i32> %x1, <16 x i32> %x2) {
+; CHECK-LABEL: foo:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovdqu (%rdi), %ymm4
+; CHECK-NEXT: vmovdqu 32(%rdi), %xmm5
+; CHECK-NEXT: vpextrb $13, %xmm5, %eax
+; CHECK-NEXT: vpextrb $10, %xmm5, %ecx
+; CHECK-NEXT: vpextrb $7, %xmm5, %edx
+; CHECK-NEXT: vpextrb $4, %xmm5, %esi
+; CHECK-NEXT: vpextrb $1, %xmm5, %edi
+; CHECK-NEXT: vextracti128 $1, %ymm4, %xmm5
+; CHECK-NEXT: vpshufb {{.*#+}} xmm6 = xmm5[2,2,5,5,5,5,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm7 = xmm4[12,12,13,13,15,15,15,15,12,12,13,13,14,14,15,15]
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm6 = xmm7[0],xmm6[0]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[0,0,1,1,3,3,3,3,6,6,9,9,9,9,7,7]
+; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm4, %ymm4
+; CHECK-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
+; CHECK-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[8,11,14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vpinsrb $3, %edi, %xmm5, %xmm5
+; CHECK-NEXT: vpinsrb $4, %esi, %xmm5, %xmm5
+; CHECK-NEXT: vpinsrb $5, %edx, %xmm5, %xmm5
+; CHECK-NEXT: vpinsrb $6, %ecx, %xmm5, %xmm5
+; CHECK-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
+; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero,xmm5[4],zero,zero,zero,xmm5[5],zero,zero,zero,xmm5[6],zero,zero,zero,xmm5[7],zero,zero,zero
+; CHECK-NEXT: vpmulld %ymm0, %ymm4, %ymm0
+; CHECK-NEXT: vpmulld %ymm1, %ymm5, %ymm1
+; CHECK-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1
+; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
+; CHECK-NEXT: vpshufb %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; CHECK-NEXT: vpshufb %xmm3, %xmm0, %xmm0
+; CHECK-NEXT: vpshufb %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; CHECK-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %1 = load <48 x i8>, <48 x i8>* %x0, align 1
+ %2 = shufflevector <48 x i8> %1, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+ %3 = zext <16 x i8> %2 to <16 x i32>
+ %4 = mul <16 x i32> %3, %x1
+ %5 = lshr <16 x i32> %4, %x2
+ %6 = trunc <16 x i32> %5 to <16 x i8>
+ ret <16 x i8> %6
+}
diff --git a/test/CodeGen/X86/vector-shuffle-variable-128.ll b/test/CodeGen/X86/vector-shuffle-variable-128.ll
index bde8a16d2a5a..452f387a4fee 100644
--- a/test/CodeGen/X86/vector-shuffle-variable-128.ll
+++ b/test/CodeGen/X86/vector-shuffle-variable-128.ll
@@ -83,7 +83,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -103,7 +103,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -168,7 +168,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@@ -188,7 +188,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@@ -257,27 +257,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
; SSE2-NEXT: andl $7, %eax
; SSE2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax
+; SSE2-NEXT: movzwl -24(%rsp,%r10,2), %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSE2-NEXT: movzwl -24(%rsp,%r8,2), %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: movzwl -24(%rsp,%r10,2), %eax
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: movzwl -24(%rsp,%rdx,2), %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movzwl -24(%rsp,%r8,2), %eax
+; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: movzwl -24(%rsp,%rdi,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
@@ -301,27 +301,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
; SSSE3-NEXT: andl $7, %eax
; SSSE3-NEXT: movzwl -24(%rsp,%rax,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax
+; SSSE3-NEXT: movzwl -24(%rsp,%r10,2), %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSSE3-NEXT: movzwl -24(%rsp,%r8,2), %eax
; SSSE3-NEXT: movd %eax, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT: movzwl -24(%rsp,%r10,2), %eax
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: movzwl -24(%rsp,%rdx,2), %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movzwl -24(%rsp,%r8,2), %eax
+; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
; SSSE3-NEXT: movd %eax, %xmm3
; SSSE3-NEXT: movzwl -24(%rsp,%rdi,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
@@ -425,67 +425,67 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm9
-; SSE2-NEXT: andl $15, %ecx
-; SSE2-NEXT: movzbl (%rcx,%r10), %eax
+; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT: andl $15, %eax
+; SSE2-NEXT: movzbl (%rax,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm10
-; SSE2-NEXT: andl $15, %r9d
-; SSE2-NEXT: movzbl (%r9,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm7
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm11
-; SSE2-NEXT: andl $15, %esi
-; SSE2-NEXT: movzbl (%rsi,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm6
+; SSE2-NEXT: movd %eax, %xmm7
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm12
+; SSE2-NEXT: movd %eax, %xmm11
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm5
+; SSE2-NEXT: movd %eax, %xmm6
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm13
-; SSE2-NEXT: andl $15, %edx
-; SSE2-NEXT: movzbl (%rdx,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: movd %eax, %xmm12
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm14
+; SSE2-NEXT: movd %eax, %xmm5
+; SSE2-NEXT: andl $15, %r9d
+; SSE2-NEXT: movzbl (%r9,%r10), %eax
+; SSE2-NEXT: movd %eax, %xmm13
; SSE2-NEXT: andl $15, %r8d
; SSE2-NEXT: movzbl (%r8,%r10), %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: andl $15, %ecx
+; SSE2-NEXT: movzbl (%rcx,%r10), %eax
+; SSE2-NEXT: movd %eax, %xmm14
+; SSE2-NEXT: andl $15, %edx
+; SSE2-NEXT: movzbl (%rdx,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT: andl $15, %eax
-; SSE2-NEXT: movzbl (%rax,%r10), %eax
+; SSE2-NEXT: andl $15, %esi
+; SSE2-NEXT: movzbl (%rsi,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: andl $15, %edi
; SSE2-NEXT: movzbl (%rdi,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -510,67 +510,67 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm9
-; SSSE3-NEXT: andl $15, %ecx
-; SSSE3-NEXT: movzbl (%rcx,%r10), %eax
+; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT: andl $15, %eax
+; SSSE3-NEXT: movzbl (%rax,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm3
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm10
-; SSSE3-NEXT: andl $15, %r9d
-; SSSE3-NEXT: movzbl (%r9,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm7
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm11
-; SSSE3-NEXT: andl $15, %esi
-; SSSE3-NEXT: movzbl (%rsi,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm6
+; SSSE3-NEXT: movd %eax, %xmm7
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm12
+; SSSE3-NEXT: movd %eax, %xmm11
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm5
+; SSSE3-NEXT: movd %eax, %xmm6
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm13
-; SSSE3-NEXT: andl $15, %edx
-; SSSE3-NEXT: movzbl (%rdx,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm4
+; SSSE3-NEXT: movd %eax, %xmm12
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm14
+; SSSE3-NEXT: movd %eax, %xmm5
+; SSSE3-NEXT: andl $15, %r9d
+; SSSE3-NEXT: movzbl (%r9,%r10), %eax
+; SSSE3-NEXT: movd %eax, %xmm13
; SSSE3-NEXT: andl $15, %r8d
; SSSE3-NEXT: movzbl (%r8,%r10), %eax
+; SSSE3-NEXT: movd %eax, %xmm4
+; SSSE3-NEXT: andl $15, %ecx
+; SSSE3-NEXT: movzbl (%rcx,%r10), %eax
+; SSSE3-NEXT: movd %eax, %xmm14
+; SSSE3-NEXT: andl $15, %edx
+; SSSE3-NEXT: movzbl (%rdx,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
-; SSSE3-NEXT: andl $15, %eax
-; SSSE3-NEXT: movzbl (%rax,%r10), %eax
+; SSSE3-NEXT: andl $15, %esi
+; SSSE3-NEXT: movzbl (%rsi,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm2
; SSSE3-NEXT: andl $15, %edi
; SSSE3-NEXT: movzbl (%rdi,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -739,7 +739,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
@@ -759,7 +759,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
@@ -824,23 +824,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSE2-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm8
-; SSE2-NEXT: movzbl 7(%rdi), %edx
+; SSE2-NEXT: movzbl 14(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm15
-; SSE2-NEXT: movzbl 11(%rdi), %edx
+; SSE2-NEXT: movzbl 13(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm9
-; SSE2-NEXT: movzbl 3(%rdi), %edx
+; SSE2-NEXT: movzbl 12(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm3
-; SSE2-NEXT: movzbl 13(%rdi), %edx
+; SSE2-NEXT: movzbl 11(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm10
-; SSE2-NEXT: movzbl 5(%rdi), %edx
+; SSE2-NEXT: movzbl 10(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm7
@@ -848,11 +848,11 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm11
-; SSE2-NEXT: movzbl 1(%rdi), %edx
+; SSE2-NEXT: movzbl 8(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm6
-; SSE2-NEXT: movzbl 14(%rdi), %edx
+; SSE2-NEXT: movzbl 7(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm12
@@ -860,23 +860,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm5
-; SSE2-NEXT: movzbl 10(%rdi), %edx
+; SSE2-NEXT: movzbl 5(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm13
-; SSE2-NEXT: movzbl 2(%rdi), %edx
+; SSE2-NEXT: movzbl 4(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm4
-; SSE2-NEXT: movzbl 12(%rdi), %edx
+; SSE2-NEXT: movzbl 3(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm14
-; SSE2-NEXT: movzbl 4(%rdi), %edx
+; SSE2-NEXT: movzbl 2(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm1
-; SSE2-NEXT: movzbl 8(%rdi), %edx
+; SSE2-NEXT: movzbl 1(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm2
@@ -885,19 +885,19 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -909,23 +909,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSSE3-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm8
-; SSSE3-NEXT: movzbl 7(%rdi), %edx
+; SSSE3-NEXT: movzbl 14(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm15
-; SSSE3-NEXT: movzbl 11(%rdi), %edx
+; SSSE3-NEXT: movzbl 13(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm9
-; SSSE3-NEXT: movzbl 3(%rdi), %edx
+; SSSE3-NEXT: movzbl 12(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm3
-; SSSE3-NEXT: movzbl 13(%rdi), %edx
+; SSSE3-NEXT: movzbl 11(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm10
-; SSSE3-NEXT: movzbl 5(%rdi), %edx
+; SSSE3-NEXT: movzbl 10(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm7
@@ -933,11 +933,11 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm11
-; SSSE3-NEXT: movzbl 1(%rdi), %edx
+; SSSE3-NEXT: movzbl 8(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm6
-; SSSE3-NEXT: movzbl 14(%rdi), %edx
+; SSSE3-NEXT: movzbl 7(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm12
@@ -945,23 +945,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm5
-; SSSE3-NEXT: movzbl 10(%rdi), %edx
+; SSSE3-NEXT: movzbl 5(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm13
-; SSSE3-NEXT: movzbl 2(%rdi), %edx
+; SSSE3-NEXT: movzbl 4(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm4
-; SSSE3-NEXT: movzbl 12(%rdi), %edx
+; SSSE3-NEXT: movzbl 3(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm14
-; SSSE3-NEXT: movzbl 4(%rdi), %edx
+; SSSE3-NEXT: movzbl 2(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm1
-; SSSE3-NEXT: movzbl 8(%rdi), %edx
+; SSSE3-NEXT: movzbl 1(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm2
@@ -970,19 +970,19 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -1225,28 +1225,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
; SSE2-NEXT: andl $7, %ecx
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: andl $7, %r8d
-; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: andl $7, %r9d
; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax
-; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; SSE2-NEXT: movzwl -40(%rsp,%rdx,2), %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: movzwl -40(%rsp,%r8,2), %eax
; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: movzwl -40(%rsp,%rdi,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movzwl -40(%rsp,%r8,2), %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
@@ -1263,28 +1262,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
; SSSE3-NEXT: andl $7, %ecx
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: andl $7, %r8d
-; SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: andl $7, %r9d
; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm3
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; SSSE3-NEXT: movzwl -40(%rsp,%rdx,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT: movzwl -40(%rsp,%r8,2), %eax
; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSSE3-NEXT: movd %eax, %xmm2
; SSSE3-NEXT: movzwl -40(%rsp,%rdi,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: movzwl -40(%rsp,%r8,2), %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
diff --git a/test/CodeGen/X86/vector-sqrt.ll b/test/CodeGen/X86/vector-sqrt.ll
index 13088b7fa5f2..c5ac4466b5fa 100644
--- a/test/CodeGen/X86/vector-sqrt.ll
+++ b/test/CodeGen/X86/vector-sqrt.ll
@@ -5,10 +5,8 @@
define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 {
; CHECK-LABEL: sqrtd2:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: vsqrtsd 8(%rdi), %xmm1, %xmm1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
entry:
@@ -29,14 +27,10 @@ declare double @sqrt(double) local_unnamed_addr #1
define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 {
; CHECK-LABEL: sqrtf4:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vsqrtss %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vsqrtss %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: vsqrtss 4(%rdi), %xmm1, %xmm1
+; CHECK-NEXT: vsqrtss 8(%rdi), %xmm2, %xmm2
+; CHECK-NEXT: vsqrtss 12(%rdi), %xmm3, %xmm3
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
diff --git a/test/CodeGen/X86/vector-unsigned-cmp.ll b/test/CodeGen/X86/vector-unsigned-cmp.ll
index fc246669992c..3e4b9aedf2b8 100644
--- a/test/CodeGen/X86/vector-unsigned-cmp.ll
+++ b/test/CodeGen/X86/vector-unsigned-cmp.ll
@@ -13,7 +13,7 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
@@ -30,9 +30,6 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
@@ -46,7 +43,7 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
@@ -63,9 +60,6 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
@@ -79,7 +73,7 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
@@ -98,9 +92,6 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@@ -116,7 +107,7 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
@@ -135,9 +126,6 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@@ -153,31 +141,15 @@ define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrld $1, %xmm0
; SSE-NEXT: psrld $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE-NEXT: pxor %xmm2, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: ugt_v4i32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: ugt_v4i32:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: ugt_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
%cmp = icmp ugt <4 x i32> %sh1, %sh2
@@ -189,32 +161,16 @@ define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrld $1, %xmm0
; SSE-NEXT: psrld $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm2
-; SSE-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: ult_v4i32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: ult_v4i32:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: ult_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
%sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
%cmp = icmp ult <4 x i32> %sh1, %sh2
@@ -226,12 +182,9 @@ define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE2: # BB#0:
; SSE2-NEXT: psrld $1, %xmm0
; SSE2-NEXT: psrld $1, %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: uge_v4i32:
@@ -260,9 +213,6 @@ define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE2: # BB#0:
; SSE2-NEXT: psrld $1, %xmm0
; SSE2-NEXT: psrld $1, %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm2, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm1, %xmm0
@@ -294,9 +244,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE-NEXT: pxor %xmm2, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pcmpgtw %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -304,9 +251,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -320,20 +264,14 @@ define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm2
-; SSE-NEXT: pcmpgtw %xmm0, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pcmpgtw %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ult_v8i16:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -408,22 +346,20 @@ define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SSE-LABEL: ugt_v16i8:
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
-; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE-NEXT: por %xmm2, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ugt_v16i8:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -436,11 +372,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SSE-LABEL: ult_v16i8:
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE-NEXT: por %xmm2, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pand %xmm1, %xmm2
; SSE-NEXT: pcmpgtb %xmm0, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: retq
@@ -448,11 +383,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
; AVX-LABEL: ult_v16i8:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
diff --git a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
index 0eb17fb6c14d..c1d242575253 100644
--- a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
+++ b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
@@ -15,5 +15,5 @@ body:
; PRE-RA-NEXT: - { reg: '%esi', virtual-reg: '%1' }
; POST-RA: liveins:
-; POST-RA-NEXT: - { reg: '%edi' }
-; POST-RA-NEXT: - { reg: '%esi' }
+; POST-RA-NEXT: - { reg: '%edi', virtual-reg: '' }
+; POST-RA-NEXT: - { reg: '%esi', virtual-reg: '' }
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index c9a34de12369..a31adc337906 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # BB#0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-NEXT: psllq %xmm2, %xmm0
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
;
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 88cba8a4d6ac..a381637b40a9 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # BB#0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-NEXT: psrlq %xmm2, %xmm0
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X32-NEXT: psrlq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
;
diff --git a/test/CodeGen/X86/x86-interleaved-access.ll b/test/CodeGen/X86/x86-interleaved-access.ll
index 4181a374c61c..74214aa1b8b7 100644
--- a/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/test/CodeGen/X86/x86-interleaved-access.ll
@@ -135,3 +135,96 @@ define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) {
%add3 = add <4 x i64> %add2, %strided.v3
ret <4 x i64> %add3
}
+
+define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
+; AVX-LABEL: store_factorf64_4:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm2[0],xmm3[0]
+; AVX-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm4
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm5 = xmm0[0],xmm1[0]
+; AVX-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3]
+; AVX-NEXT: vunpckhpd {{.*#+}} xmm5 = xmm2[1],xmm3[1]
+; AVX-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5
+; AVX-NEXT: vunpckhpd {{.*#+}} xmm6 = xmm0[1],xmm1[1]
+; AVX-NEXT: vblendpd {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3]
+; AVX-NEXT: vunpcklpd {{.*#+}} ymm6 = ymm2[0],ymm3[0],ymm2[2],ymm3[2]
+; AVX-NEXT: vunpcklpd {{.*#+}} ymm7 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX-NEXT: vextractf128 $1, %ymm7, %xmm7
+; AVX-NEXT: vblendpd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3]
+; AVX-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3]
+; AVX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3]
+; AVX-NEXT: vmovupd %ymm0, 96(%rdi)
+; AVX-NEXT: vmovupd %ymm6, 64(%rdi)
+; AVX-NEXT: vmovupd %ymm5, 32(%rdi)
+; AVX-NEXT: vmovupd %ymm4, (%rdi)
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
+ ret void
+}
+
+define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) {
+; AVX1-LABEL: store_factori64_4:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm2[0],xmm3[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm4
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3]
+; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm5 = xmm2[1],xmm3[1]
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5
+; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm6 = xmm0[1],xmm1[1]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3]
+; AVX1-NEXT: vunpcklpd {{.*#+}} ymm6 = ymm2[0],ymm3[0],ymm2[2],ymm3[2]
+; AVX1-NEXT: vunpcklpd {{.*#+}} ymm7 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
+; AVX1-NEXT: vblendpd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3]
+; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3]
+; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3]
+; AVX1-NEXT: vmovupd %ymm0, 96(%rdi)
+; AVX1-NEXT: vmovupd %ymm6, 64(%rdi)
+; AVX1-NEXT: vmovupd %ymm5, 32(%rdi)
+; AVX1-NEXT: vmovupd %ymm4, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: store_factori64_4:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm4 = ymm2[0],ymm3[0],ymm2[2],ymm3[2]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm5
+; AVX2-NEXT: vpermq {{.*#+}} ymm6 = ymm1[0,2,2,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm5 = xmm5[0,1],xmm6[2,3]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
+; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm5 = ymm2[1],ymm3[1],ymm2[3],ymm3[3]
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm6
+; AVX2-NEXT: vpermq {{.*#+}} ymm7 = ymm0[3,1,2,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm6
+; AVX2-NEXT: vpbroadcastq %xmm3, %ymm7
+; AVX2-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm7[6,7]
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm7 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm6 = ymm7[0,1,2,3],ymm6[4,5,6,7]
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,1,1,3]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7]
+; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
+; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
+; AVX2-NEXT: vmovdqu %ymm6, (%rdi)
+; AVX2-NEXT: vmovdqu %ymm5, 96(%rdi)
+; AVX2-NEXT: vmovdqu %ymm4, 64(%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ store <16 x i64> %interleaved.vec, <16 x i64>* %ptr, align 16
+ ret void
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-dwp.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-dwp.s
new file mode 100644
index 000000000000..8a9c03b77c0d
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-dwp.s
@@ -0,0 +1,277 @@
+# Test object to verify that dwarfdump handles dwp files with DWARF v5 string
+# offset tables. We have 2 CUs and 2 TUs, where it is assumed that
+# CU1 and TU1 came from one object file, CU2 and TU2 from a second object
+# file.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-dwp.s -filetype=obj \
+# -o dwarfdump-str_offsets-dwp.x86_64.o
+
+ .section .debug_str.dwo,"MS",@progbits,1
+str_producer:
+ .asciz "Handmade DWARF producer"
+str_CU1:
+ .asciz "Compile_Unit_1"
+str_CU1_dir:
+ .asciz "/home/test/CU1"
+str_CU2:
+ .asciz "Compile_Unit_2"
+str_CU2_dir:
+ .asciz "/home/test/CU2"
+str_TU1:
+ .asciz "Type_Unit_1"
+str_TU1_type:
+ .asciz "MyStruct_1"
+str_TU2:
+ .asciz "Type_Unit_2"
+str_TU2_type:
+ .asciz "MyStruct_2"
+
+ .section .debug_str_offsets.dwo,"",@progbits
+# Object files 1's portion of the .debug_str_offsets.dwo section.
+.debug_str_offsets_object_file1:
+
+# CU1's contribution (from object file 1)
+.debug_str_offsets_start_CU1:
+ .long .debug_str_offsets_end_CU1-.debug_str_offsets_base_CU1
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base_CU1:
+ .long str_producer-.debug_str.dwo
+ .long str_CU1-.debug_str.dwo
+ .long str_CU1_dir-.debug_str.dwo
+.debug_str_offsets_end_CU1:
+
+# TU1's contribution (from object file 1)
+.debug_str_offsets_start_TU1:
+ .long .debug_str_offsets_end_TU1-.debug_str_offsets_base_TU1
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base_TU1:
+ .long str_TU1-.debug_str.dwo
+ .long str_TU1_type-.debug_str.dwo
+.debug_str_offsets_end_TU1:
+
+# Object files 2's portion of the .debug_str_offsets.dwo section.
+.debug_str_offsets_object_file2:
+
+# CU2's contribution (from object file 2)
+.debug_str_offsets_start_CU2:
+ .long .debug_str_offsets_end_CU2-.debug_str_offsets_base_CU2
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base_CU2:
+ .long str_producer-.debug_str.dwo
+ .long str_CU2-.debug_str.dwo
+ .long str_CU2_dir-.debug_str.dwo
+.debug_str_offsets_end_CU2:
+
+# TU2's contribution (from object file 2)
+.debug_str_offsets_start_TU2:
+ .long .debug_str_offsets_end_TU2-.debug_str_offsets_base_TU2
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base_TU2:
+ .long str_TU2-.debug_str.dwo
+ .long str_TU2_type-.debug_str.dwo
+.debug_str_offsets_end_TU2:
+
+
+# Abbrevs are shared for all compile and type units
+ .section .debug_abbrev.dwo,"",@progbits
+ .byte 0x01 # Abbrev code
+ .byte 0x11 # DW_TAG_compile_unit
+ .byte 0x00 # DW_CHILDREN_no
+ .byte 0x25 # DW_AT_producer
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x02 # Abbrev code
+ .byte 0x41 # DW_TAG_type_unit
+ .byte 0x01 # DW_CHILDREN_yes
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x03 # Abbrev code
+ .byte 0x13 # DW_TAG_structure_type
+ .byte 0x00 # DW_CHILDREN_no (no members)
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x00 # EOM(3)
+abbrev_end:
+
+ .section .debug_info.dwo,"",@progbits
+
+# DWARF v5 CU header.
+CU1_5_start:
+ .long CU1_5_end-CU1_5_version # Length of Unit
+CU1_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev.dwo # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+ .byte 1 # Abbreviation code
+ .byte 0 # The index of the producer string
+ .byte 1 # The index of the CU name string
+# The DW_AT_str_offsets_base attribute for CU1 contains the offset of CU1's
+# contribution relative to the start of object file 1's portion of the
+# .debug_str_offsets section.
+ .long .debug_str_offsets_base_CU1-.debug_str_offsets_object_file1
+ .byte 2 # The index of the comp dir string
+ .byte 0 # NULL
+CU1_5_end:
+
+CU2_5_start:
+ .long CU2_5_end-CU2_5_version # Length of Unit
+CU2_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev.dwo # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+ .byte 1 # Abbreviation code
+ .byte 0 # The index of the producer string
+ .byte 1 # The index of the CU name string
+# The DW_AT_str_offsets_base attribute for CU2 contains the offset of CU2's
+# contribution relative to the start of object file 2's portion of the
+# .debug_str_offsets section.
+ .long .debug_str_offsets_base_CU2-.debug_str_offsets_object_file2
+ .byte 2 # The index of the comp dir string
+ .byte 0 # NULL
+CU2_5_end:
+
+ .section .debug_types.dwo,"",@progbits
+# DWARF v5 Type unit header.
+TU1_5_start:
+ .long TU1_5_end-TU1_5_version # Length of Unit
+TU1_5_version:
+ .short 5 # DWARF version number
+ .byte 2 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev.dwo # Offset Into Abbrev. Section
+ .quad 0x0011223344556677 # Type Signature
+ .long TU1_5_type-TU1_5_start # Type offset
+# The type-unit DIE, which has a name.
+ .byte 2 # Abbreviation code
+ .byte 0 # Index of the unit type name string
+# The DW_AT_str_offsets_base attribute for TU1 contains the offset of TU1's
+# contribution relative to the start of object file 1's portion of the
+# .debug_str_offsets section.
+ .long .debug_str_offsets_base_TU1-.debug_str_offsets_object_file1
+# The type DIE, which has a name.
+TU1_5_type:
+ .byte 3 # Abbreviation code
+ .byte 1 # Index of the type name string
+ .byte 0 # NULL
+ .byte 0 # NULL
+TU1_5_end:
+
+TU2_5_start:
+ .long TU2_5_end-TU2_5_version # Length of Unit
+TU2_5_version:
+ .short 5 # DWARF version number
+ .byte 2 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev.dwo # Offset Into Abbrev. Section
+ .quad 0x00aabbccddeeff99 # Type Signature
+ .long TU2_5_type-TU2_5_start # Type offset
+# The type-unit DIE, which has a name.
+ .byte 2 # Abbreviation code
+ .byte 0 # Index of the unit type name string
+# The DW_AT_str_offsets_base attribute for TU2 contains the offset of TU2's
+# contribution relative to the start of object file 2's portion of the
+# .debug_str_offsets section.
+ .long .debug_str_offsets_base_TU2-.debug_str_offsets_object_file2
+# The type DIE, which has a name.
+TU2_5_type:
+ .byte 3 # Abbreviation code
+ .byte 1 # Index of the type name string
+ .byte 0 # NULL
+ .byte 0 # NULL
+TU2_5_end:
+
+ .section .debug_cu_index,"",@progbits
+ # The index header
+ .long 2 # Version
+ .long 3 # Columns of contribution matrix
+ .long 2 # number of units
+ .long 2 # number of hash buckets in table
+
+ # The signatures for both CUs.
+ .quad 0xddeeaaddbbaabbee # signature 1
+ .quad 0xff00ffeeffaaff00 # signature 2
+ # The indexes for both CUs.
+ .long 1 # index 1
+ .long 2 # index 2
+ # The sections to which both CUs contribute.
+ .long 1 # DW_SECT_INFO
+ .long 3 # DW_SECT_ABBREV
+ .long 6 # DW_SECT_STR_OFFSETS
+
+ # The starting offsets of both CU's contributions to info,
+ # abbrev and string offsets table.
+ .long CU1_5_start-.debug_info.dwo
+ .long 0
+ .long .debug_str_offsets_object_file1-.debug_str_offsets.dwo
+ .long CU2_5_start-.debug_info.dwo
+ .long 0
+ .long .debug_str_offsets_object_file2-.debug_str_offsets.dwo
+
+ # The lengths of both CU's contributions to info, abbrev and
+ # string offsets table.
+ .long CU1_5_end-CU1_5_start
+ .long abbrev_end-.debug_abbrev.dwo
+ .long .debug_str_offsets_end_CU1-.debug_str_offsets_start_CU1
+ .long CU2_5_end-CU2_5_start
+ .long abbrev_end-.debug_abbrev.dwo
+ .long .debug_str_offsets_end_CU2-.debug_str_offsets_start_CU2
+
+ .section .debug_tu_index,"",@progbits
+ # The index header
+ .long 2 # Version
+ .long 3 # Columns of contribution matrix
+ .long 2 # number of units
+ .long 2 # number of hash buckets in table
+
+ # The signatures for both TUs.
+ .quad 0xeeaaddbbaabbeedd # signature 1
+ .quad 0x00ffeeffaaff00ff # signature 2
+ # The indexes for both TUs.
+ .long 1 # index 1
+ .long 2 # index 2
+ # The sections to which both TUs contribute.
+ .long 2 # DW_SECT_TYPES
+ .long 3 # DW_SECT_ABBREV
+ .long 6 # DW_SECT_STR_OFFSETS
+
+ # The starting offsets of both TU's contributions to info,
+ # abbrev and string offsets table.
+ .long TU1_5_start-.debug_types.dwo
+ .long 0
+ .long .debug_str_offsets_object_file1-.debug_str_offsets.dwo
+ .long TU2_5_start-.debug_types.dwo
+ .long 0
+ .long .debug_str_offsets_object_file2-.debug_str_offsets.dwo
+
+ # The lengths of both TU's contributions to info, abbrev and
+ # string offsets table.
+ .long TU1_5_end-TU1_5_start
+ .long abbrev_end-.debug_abbrev.dwo
+ .long .debug_str_offsets_end_TU1-.debug_str_offsets_start_TU1
+ .long TU2_5_end-TU2_5_start
+ .long abbrev_end-.debug_abbrev.dwo
+ .long .debug_str_offsets_end_TU2-.debug_str_offsets_start_TU2
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.s
new file mode 100644
index 000000000000..361448af0e87
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.s
@@ -0,0 +1,34 @@
+# Test object to verify that llvm-dwarfdump handles an invalid string offsets
+# table.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-1.s -filetype=obj \
+# -o dwarfdump-str-offsets-invalid-1.x86_64.o
+#
+# A rudimentary abbrev section.
+ .section .debug_abbrev,"",@progbits
+ .byte 0x01 # Abbrev code
+ .byte 0x11 # DW_TAG_compile_unit
+ .byte 0x00 # DW_CHILDREN_no
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x00 # EOM(3)
+
+# A rudimentary compile unit to convince dwarfdump that we are dealing with a
+# DWARF v5 string offsets table.
+ .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+ .long CU1_5_end-CU1_5_version # Length of Unit
+CU1_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+# A compile-unit DIE, which has no attributes.
+ .byte 1 # Abbreviation code
+CU1_5_end:
+
+ .section .debug_str_offsets,"",@progbits
+# A degenerate section, not enough for a single contribution size.
+ .byte 2
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.o
new file mode 100644
index 000000000000..65aae84c22f4
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.s
new file mode 100644
index 000000000000..2f0fdfce2438
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.s
@@ -0,0 +1,36 @@
+# Test object to verify that llvm-dwarfdump handles an invalid string offsets
+# table.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-2.s -filetype=obj \
+# -o dwarfdump-str-offsets-invalid-2.x86_64.o
+
+# A rudimentary abbrev section.
+ .section .debug_abbrev,"",@progbits
+ .byte 0x01 # Abbrev code
+ .byte 0x11 # DW_TAG_compile_unit
+ .byte 0x00 # DW_CHILDREN_no
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x00 # EOM(3)
+
+# A rudimentary compile unit to convince dwarfdump that we are dealing with a
+# DWARF v5 string offsets table.
+ .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+ .long CU1_5_end-CU1_5_version # Length of Unit
+CU1_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+# A compile-unit DIE, which has no attributes.
+ .byte 1 # Abbreviation code
+CU1_5_end:
+
+ .section .debug_str_offsets,"",@progbits
+# A degenerate section with fewer bytes than required for a DWARF64 size.
+ .long 0xffffffff
+ .long 0
+ .short 4
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.o
new file mode 100644
index 000000000000..90d2074f19ff
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.s
new file mode 100644
index 000000000000..b4355fe27f75
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.s
@@ -0,0 +1,88 @@
+# Test object to verify that llvm-dwarfdump handles an invalid string offsets
+# table.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-3.s -filetype=obj \
+# -o dwarfdump-str-offsets-invalid-3.x86_64.o
+
+ .section .debug_str,"MS",@progbits,1
+str_producer:
+ .asciz "Handmade DWARF producer"
+str_CU1:
+ .asciz "Compile_Unit_1"
+str_CU1_dir:
+ .asciz "/home/test/CU1"
+str_CU2:
+ .asciz "Compile_Unit_2"
+str_CU2_dir:
+ .asciz "/home/test/CU2"
+str_TU:
+ .asciz "Type_Unit"
+str_TU_type:
+ .asciz "MyStruct"
+
+ .section .debug_str.dwo,"MS",@progbits,1
+dwo_str_CU_5_producer:
+ .asciz "Handmade split DWARF producer"
+dwo_str_CU_5_name:
+ .asciz "V5_split_compile_unit"
+dwo_str_CU_5_comp_dir:
+ .asciz "/home/test/splitCU"
+dwo_str_TU_5:
+ .asciz "V5_split_type_unit"
+dwo_str_TU_5_type:
+ .asciz "V5_split_Mystruct"
+
+# A rudimentary abbrev section.
+ .section .debug_abbrev,"",@progbits
+ .byte 0x01 # Abbrev code
+ .byte 0x11 # DW_TAG_compile_unit
+ .byte 0x00 # DW_CHILDREN_no
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x00 # EOM(3)
+
+# A rudimentary compile unit to convince dwarfdump that we are dealing with a
+# DWARF v5 string offsets table.
+ .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+ .long CU1_5_end-CU1_5_version # Length of Unit
+CU1_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+# A compile-unit DIE, which has no attributes.
+ .byte 1 # Abbreviation code
+CU1_5_end:
+
+ .section .debug_str_offsets,"",@progbits
+# CU1's contribution
+# Invalid length
+ .long 0xfffffffe
+ .long .debug_str_offsets_segment0_end-.debug_str_offsets_base0
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base0:
+ .long str_producer
+ .long str_CU1
+ .long str_CU1_dir
+.debug_str_offsets_segment0_end:
+# CU2's contribution
+ .long .debug_str_offsets_segment1_end-.debug_str_offsets_base1
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base1:
+ .long str_producer
+ .long str_CU2
+ .long str_CU2_dir
+.debug_str_offsets_segment1_end:
+# The TU's contribution
+ .long .debug_str_offsets_segment2_end-.debug_str_offsets_base2
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base2:
+ .long str_TU
+ .long str_TU_type
+.debug_str_offsets_segment2_end:
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.o
new file mode 100644
index 000000000000..68f8c5f3be02
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.s
new file mode 100644
index 000000000000..8ec288151eca
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.s
@@ -0,0 +1,50 @@
+# Test object to verify that llvm-dwarfdump handles an invalid string offsets
+# table.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-4.s -filetype=obj \
+# -o dwarfdump-str-offsets-invalid-4.x86_64.o
+
+ .section .debug_str,"MS",@progbits,1
+str_producer:
+ .asciz "Handmade DWARF producer"
+str_CU1:
+ .asciz "Compile_Unit_1"
+
+# A rudimentary abbrev section.
+ .section .debug_abbrev,"",@progbits
+ .byte 0x01 # Abbrev code
+ .byte 0x11 # DW_TAG_compile_unit
+ .byte 0x00 # DW_CHILDREN_no
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x00 # EOM(3)
+
+# A rudimentary compile unit to convince dwarfdump that we are dealing with a
+# DWARF v5 string offsets table.
+ .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+ .long CU1_5_end-CU1_5_version # Length of Unit
+CU1_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+# A compile-unit DIE, which has no attributes.
+ .byte 1 # Abbreviation code
+CU1_5_end:
+
+# Every unit contributes to the string_offsets table.
+ .section .debug_str_offsets,"",@progbits
+# CU1's contribution
+# The length is not a multiple of 4. Check that we don't read off the
+# end.
+ .long .debug_str_offsets_segment0_end-.debug_str_offsets_base0
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base0:
+ .long str_producer
+ .long str_CU1
+ .byte 0
+.debug_str_offsets_segment0_end:
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.o
new file mode 100644
index 000000000000..8a17b0e6a519
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.s
new file mode 100644
index 000000000000..e185e407b630
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.s
@@ -0,0 +1,10 @@
+# Test object to verify that llvm-dwarfdump handles a degenerate string offsets
+# section.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-5.s -filetype=obj \
+# -o dwarfdump-str-offsets-invalid-5.x86_64.o
+# Every unit contributes to the string_offsets table.
+ .section .debug_str_offsets,"",@progbits
+# A degenerate section, not enough for a single entry.
+ .byte 2
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.o
new file mode 100644
index 000000000000..6cfce83e5655
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets.s
new file mode 100644
index 000000000000..e0a634c7c4a2
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets.s
@@ -0,0 +1,500 @@
+# Test object to verify dwarfdump handles v5 string offset tables.
+# We have 2 v5 CUs, a v5 TU, and a split v5 CU and TU.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets.s -filetype=obj \
+# -o dwarfdump-str-offsets.x86_64.o
+
+ .section .debug_str,"MS",@progbits,1
+str_producer:
+ .asciz "Handmade DWARF producer"
+str_CU1:
+ .asciz "Compile_Unit_1"
+str_CU1_dir:
+ .asciz "/home/test/CU1"
+str_CU2:
+ .asciz "Compile_Unit_2"
+str_CU2_dir:
+ .asciz "/home/test/CU2"
+str_TU:
+ .asciz "Type_Unit"
+str_TU_type:
+ .asciz "MyStruct"
+
+# Every unit contributes to the string_offsets table.
+ .section .debug_str_offsets,"",@progbits
+# CU1's contribution
+ .long .debug_str_offsets_segment0_end-.debug_str_offsets_base0
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base0:
+ .long str_producer
+ .long str_CU1
+ .long str_CU1_dir
+.debug_str_offsets_segment0_end:
+# CU2's contribution
+ .long .debug_str_offsets_segment1_end-.debug_str_offsets_base1
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base1:
+ .long str_producer
+ .long str_CU2
+ .long str_CU2_dir
+.debug_str_offsets_segment1_end:
+# The TU's contribution
+ .long .debug_str_offsets_segment2_end-.debug_str_offsets_base2
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base2:
+ .long str_TU
+ .long str_TU_type
+.debug_str_offsets_segment2_end:
+
+ .section .debug_str.dwo,"MS",@progbits,1
+dwo_str_CU_5_producer:
+ .asciz "Handmade split DWARF producer"
+dwo_str_CU_5_name:
+ .asciz "V5_split_compile_unit"
+dwo_str_CU_5_comp_dir:
+ .asciz "/home/test/splitCU"
+dwo_str_TU_5:
+ .asciz "V5_split_type_unit"
+dwo_str_TU_5_type:
+ .asciz "V5_split_Mystruct"
+
+ .section .debug_str_offsets.dwo,"",@progbits
+# The split CU's contribution
+ .long .debug_dwo_str_offsets_segment0_end-.debug_dwo_str_offsets_base0
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_dwo_str_offsets_base0:
+ .long dwo_str_CU_5_producer-.debug_str.dwo
+ .long dwo_str_CU_5_name-.debug_str.dwo
+ .long dwo_str_CU_5_comp_dir-.debug_str.dwo
+.debug_dwo_str_offsets_segment0_end:
+# The split TU's contribution
+ .long .debug_dwo_str_offsets_segment1_end-.debug_dwo_str_offsets_base1
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_dwo_str_offsets_base1:
+ .long dwo_str_TU_5-.debug_str.dwo
+ .long dwo_str_TU_5_type-.debug_str.dwo
+.debug_dwo_str_offsets_segment1_end:
+
+# All CUs/TUs use the same abbrev section for simplicity.
+ .section .debug_abbrev,"",@progbits
+ .byte 0x01 # Abbrev code
+ .byte 0x11 # DW_TAG_compile_unit
+ .byte 0x00 # DW_CHILDREN_no
+ .byte 0x25 # DW_AT_producer
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x1b # DW_AT_comp_dir
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x02 # Abbrev code
+ .byte 0x41 # DW_TAG_type_unit
+ .byte 0x01 # DW_CHILDREN_yes
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x03 # Abbrev code
+ .byte 0x13 # DW_TAG_structure_type
+ .byte 0x00 # DW_CHILDREN_no (no members)
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x00 # EOM(3)
+
+# And a .dwo copy for the .dwo sections.
+ .section .debug_abbrev.dwo,"",@progbits
+ .byte 0x01 # Abbrev code
+ .byte 0x11 # DW_TAG_compile_unit
+ .byte 0x00 # DW_CHILDREN_no
+ .byte 0x25 # DW_AT_producer
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x1b # DW_AT_comp_dir
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x02 # Abbrev code
+ .byte 0x41 # DW_TAG_type_unit
+ .byte 0x01 # DW_CHILDREN_yes
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x03 # Abbrev code
+ .byte 0x13 # DW_TAG_structure_type
+ .byte 0x00 # DW_CHILDREN_no (no members)
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x00 # EOM(3)
+
+ .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+ .long CU1_5_end-CU1_5_version # Length of Unit
+CU1_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+ .byte 1 # Abbreviation code
+ .byte 0 # The index of the producer string
+ .byte 1 # The index of the CU name string
+ .long .debug_str_offsets_base0
+ .byte 2 # The index of the comp dir string
+ .byte 0 # NULL
+CU1_5_end:
+
+# DWARF v5 CU header
+ .long CU2_5_end-CU2_5_version # Length of Unit
+CU2_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+ .byte 1 # Abbreviation code
+ .byte 0 # The index of the producer string
+ .byte 1 # The index of the CU name string
+ .long .debug_str_offsets_base1
+ .byte 2 # The index of the comp dir string
+ .byte 0 # NULL
+CU2_5_end:
+
+ .section .debug_types,"",@progbits
+# DWARF v5 Type unit header.
+TU_5_start:
+ .long TU_5_end-TU_5_version # Length of Unit
+TU_5_version:
+ .short 5 # DWARF version number
+ .byte 2 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .quad 0x0011223344556677 # Type Signature
+ .long TU_5_type-TU_5_start # Type offset
+# The type-unit DIE, which has a name.
+ .byte 2 # Abbreviation code
+ .byte 0 # Index of the unit type name string
+ .long .debug_str_offsets_base2 # offset into the str_offsets section
+# The type DIE, which has a name.
+TU_5_type:
+ .byte 3 # Abbreviation code
+ .byte 1 # Index of the type name string
+ .byte 0 # NULL
+ .byte 0 # NULL
+TU_5_end:
+
+ .section .debug_info.dwo,"",@progbits
+
+# DWARF v5 split CU header.
+ .long CU_split_5_end-CU_split_5_version # Length of Unit
+CU_split_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev.dwo # Offset Into Abbrev Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+ .byte 1 # Abbreviation code
+ .byte 0 # The index of the producer string
+ .byte 1 # The index of the CU name string
+ .long .debug_dwo_str_offsets_base0-.debug_str_offsets.dwo
+ .byte 2 # The index of the comp dir string
+ .byte 0 # NULL
+CU_split_5_end:
+
+ .section .debug_types.dwo,"",@progbits
+
+# DWARF v5 split type unit header.
+TU_split_5_start:
+ .long TU_split_5_end-TU_split_5_version # Length of Unit
+TU_split_5_version:
+ .short 5 # DWARF version number
+ .byte 6 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev.dwo # Offset Into Abbrev Section
+ .quad 0x8899aabbccddeeff # Type Signature
+ .long TU_split_5_type-TU_split_5_start # Type offset
+# The type-unit DIE, which has a name.
+ .byte 2 # Abbreviation code
+ .byte 0 # The index of the type unit name string
+ .long .debug_dwo_str_offsets_base1-.debug_str_offsets.dwo
+# The type DIE, which has a name.
+TU_split_5_type:
+ .byte 3 # Abbreviation code
+ .byte 1 # The index of the type name string
+ .byte 0 # NULL
+ .byte 0 # NULL
+TU_split_5_end:
+# Test object to verify dwarfdump handles v5 string offset tables.
+# We have 2 v5 CUs, a v5 TU, and a split v5 CU and TU.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets.s -filetype=obj \
+# -o dwarfdump-str-offsets.elf-x86-64
+
+ .section .debug_str,"MS",@progbits,1
+str_producer:
+ .asciz "Handmade DWARF producer"
+str_CU1:
+ .asciz "Compile_Unit_1"
+str_CU1_dir:
+ .asciz "/home/test/CU1"
+str_CU2:
+ .asciz "Compile_Unit_2"
+str_CU2_dir:
+ .asciz "/home/test/CU2"
+str_TU:
+ .asciz "Type_Unit"
+str_TU_type:
+ .asciz "MyStruct"
+
+# Every unit contributes to the string_offsets table.
+ .section .debug_str_offsets,"",@progbits
+# CU1's contribution
+ .long .debug_str_offsets_segment0_end-.debug_str_offsets_base0
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base0:
+ .long str_producer
+ .long str_CU1
+ .long str_CU1_dir
+.debug_str_offsets_segment0_end:
+# CU2's contribution
+ .long .debug_str_offsets_segment1_end-.debug_str_offsets_base1
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base1:
+ .long str_producer
+ .long str_CU2
+ .long str_CU2_dir
+.debug_str_offsets_segment1_end:
+# The TU's contribution
+ .long .debug_str_offsets_segment2_end-.debug_str_offsets_base2
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_str_offsets_base2:
+ .long str_TU
+ .long str_TU_type
+.debug_str_offsets_segment2_end:
+
+ .section .debug_str.dwo,"MS",@progbits,1
+dwo_str_CU_5_producer:
+ .asciz "Handmade split DWARF producer"
+dwo_str_CU_5_name:
+ .asciz "V5_split_compile_unit"
+dwo_str_CU_5_comp_dir:
+ .asciz "/home/test/splitCU"
+dwo_str_TU_5:
+ .asciz "V5_split_type_unit"
+dwo_str_TU_5_type:
+ .asciz "V5_split_Mystruct"
+
+ .section .debug_str_offsets.dwo,"",@progbits
+# The split CU's contribution
+ .long .debug_dwo_str_offsets_segment0_end-.debug_dwo_str_offsets_base0
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_dwo_str_offsets_base0:
+ .long dwo_str_CU_5_producer-.debug_str.dwo
+ .long dwo_str_CU_5_name-.debug_str.dwo
+ .long dwo_str_CU_5_comp_dir-.debug_str.dwo
+.debug_dwo_str_offsets_segment0_end:
+# The split TU's contribution
+ .long .debug_dwo_str_offsets_segment1_end-.debug_dwo_str_offsets_base1
+ .short 5 # DWARF version
+ .short 0 # Padding
+.debug_dwo_str_offsets_base1:
+ .long dwo_str_TU_5-.debug_str.dwo
+ .long dwo_str_TU_5_type-.debug_str.dwo
+.debug_dwo_str_offsets_segment1_end:
+
+# All CUs/TUs use the same abbrev section for simplicity.
+ .section .debug_abbrev,"",@progbits
+ .byte 0x01 # Abbrev code
+ .byte 0x11 # DW_TAG_compile_unit
+ .byte 0x00 # DW_CHILDREN_no
+ .byte 0x25 # DW_AT_producer
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x1b # DW_AT_comp_dir
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x02 # Abbrev code
+ .byte 0x41 # DW_TAG_type_unit
+ .byte 0x01 # DW_CHILDREN_yes
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x03 # Abbrev code
+ .byte 0x13 # DW_TAG_structure_type
+ .byte 0x00 # DW_CHILDREN_no (no members)
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x00 # EOM(3)
+
+# And a .dwo copy for the .dwo sections.
+ .section .debug_abbrev.dwo,"",@progbits
+ .byte 0x01 # Abbrev code
+ .byte 0x11 # DW_TAG_compile_unit
+ .byte 0x00 # DW_CHILDREN_no
+ .byte 0x25 # DW_AT_producer
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x1b # DW_AT_comp_dir
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x02 # Abbrev code
+ .byte 0x41 # DW_TAG_type_unit
+ .byte 0x01 # DW_CHILDREN_yes
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x72 # DW_AT_str_offsets_base
+ .byte 0x17 # DW_FORM_sec_offset
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x03 # Abbrev code
+ .byte 0x13 # DW_TAG_structure_type
+ .byte 0x00 # DW_CHILDREN_no (no members)
+ .byte 0x03 # DW_AT_name
+ .byte 0x1a # DW_FORM_strx
+ .byte 0x00 # EOM(1)
+ .byte 0x00 # EOM(2)
+ .byte 0x00 # EOM(3)
+
+ .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+ .long CU1_5_end-CU1_5_version # Length of Unit
+CU1_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+ .byte 1 # Abbreviation code
+ .byte 0 # The index of the producer string
+ .byte 1 # The index of the CU name string
+ .long .debug_str_offsets_base0
+ .byte 2 # The index of the comp dir string
+ .byte 0 # NULL
+CU1_5_end:
+
+# DWARF v5 CU header
+ .long CU2_5_end-CU2_5_version # Length of Unit
+CU2_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+ .byte 1 # Abbreviation code
+ .byte 0 # The index of the producer string
+ .byte 1 # The index of the CU name string
+ .long .debug_str_offsets_base1
+ .byte 2 # The index of the comp dir string
+ .byte 0 # NULL
+CU2_5_end:
+
+ .section .debug_types,"",@progbits
+# DWARF v5 Type unit header.
+TU_5_start:
+ .long TU_5_end-TU_5_version # Length of Unit
+TU_5_version:
+ .short 5 # DWARF version number
+ .byte 2 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .quad 0x0011223344556677 # Type Signature
+ .long TU_5_type-TU_5_start # Type offset
+# The type-unit DIE, which has a name.
+ .byte 2 # Abbreviation code
+ .byte 0 # Index of the unit type name string
+ .long .debug_str_offsets_base2 # offset into the str_offsets section
+# The type DIE, which has a name.
+TU_5_type:
+ .byte 3 # Abbreviation code
+ .byte 1 # Index of the type name string
+ .byte 0 # NULL
+ .byte 0 # NULL
+TU_5_end:
+
+ .section .debug_info.dwo,"",@progbits
+
+# DWARF v5 split CU header.
+ .long CU_split_5_end-CU_split_5_version # Length of Unit
+CU_split_5_version:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev.dwo # Offset Into Abbrev Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+ .byte 1 # Abbreviation code
+ .byte 0 # The index of the producer string
+ .byte 1 # The index of the CU name string
+ .long .debug_dwo_str_offsets_base0-.debug_str_offsets.dwo
+ .byte 2 # The index of the comp dir string
+ .byte 0 # NULL
+CU_split_5_end:
+
+ .section .debug_types.dwo,"",@progbits
+
+# DWARF v5 split type unit header.
+TU_split_5_start:
+ .long TU_split_5_end-TU_split_5_version # Length of Unit
+TU_split_5_version:
+ .short 5 # DWARF version number
+ .byte 6 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev.dwo # Offset Into Abbrev Section
+ .quad 0x8899aabbccddeeff # Type Signature
+ .long TU_split_5_type-TU_split_5_start # Type offset
+# The type-unit DIE, which has a name.
+ .byte 2 # Abbreviation code
+ .byte 0 # The index of the type unit name string
+ .long .debug_dwo_str_offsets_base1-.debug_str_offsets.dwo
+# The type DIE, which has a name.
+TU_split_5_type:
+ .byte 3 # Abbreviation code
+ .byte 1 # The index of the type name string
+ .byte 0 # NULL
+ .byte 0 # NULL
+TU_split_5_end:
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets.x86_64.o
new file mode 100644
index 000000000000..e15ac1c1542f
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets.x86_64.o
Binary files differ
diff --git a/test/DebugInfo/PDB/DIA/pdbdump-flags.test b/test/DebugInfo/PDB/DIA/pdbdump-flags.test
index 9233e20ee82e..02291c9afa9c 100644
--- a/test/DebugInfo/PDB/DIA/pdbdump-flags.test
+++ b/test/DebugInfo/PDB/DIA/pdbdump-flags.test
@@ -1,7 +1,7 @@
-; RUN: llvm-pdbdump pretty %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS
-; RUN: llvm-pdbdump pretty -types %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES
-; RUN: llvm-pdbdump pretty -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS
-; RUN: llvm-pdbdump pretty -types -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE
+; RUN: llvm-pdbutil pretty %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS
+; RUN: llvm-pdbutil pretty -types %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES
+; RUN: llvm-pdbutil pretty -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS
+; RUN: llvm-pdbutil pretty -types -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE
; Check that neither symbols nor compilands are dumped when neither argument specified.
; NO_ARGS: empty.pdb
diff --git a/test/DebugInfo/PDB/DIA/pdbdump-linenumbers.test b/test/DebugInfo/PDB/DIA/pdbdump-linenumbers.test
index 2a596e4af149..9556fb72edc5 100644
--- a/test/DebugInfo/PDB/DIA/pdbdump-linenumbers.test
+++ b/test/DebugInfo/PDB/DIA/pdbdump-linenumbers.test
@@ -1,5 +1,5 @@
-; RUN: llvm-pdbdump pretty -lines %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=LINE_NUMS_FPO %s
-; RUN: llvm-pdbdump pretty -lines %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=LINE_NUMS %s
+; RUN: llvm-pdbutil pretty -lines %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=LINE_NUMS_FPO %s
+; RUN: llvm-pdbutil pretty -lines %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=LINE_NUMS %s
; LINE_NUMS_FPO: llvm\test\debuginfo\pdb\inputs\symbolformat-fpo.cpp
; LINE_NUMS_FPO: Line 5, Address: [0x000011a0 - 0x000011a5] (6 bytes)
diff --git a/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test b/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
index 997cdd9f6bac..536161586ffc 100644
--- a/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
+++ b/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
@@ -1,11 +1,11 @@
-; RUN: llvm-pdbdump pretty -module-syms %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT_FPO %s
-; RUN: llvm-pdbdump pretty -module-syms %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s
-; RUN: llvm-pdbdump pretty -types %p/../Inputs/symbolformat.pdb > %t.types
+; RUN: llvm-pdbutil pretty -module-syms %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT_FPO %s
+; RUN: llvm-pdbutil pretty -module-syms %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s
+; RUN: llvm-pdbutil pretty -types %p/../Inputs/symbolformat.pdb > %t.types
; RUN: FileCheck --check-prefix=TYPES_FORMAT %s < %t.types
; RUN: FileCheck --check-prefix=TYPES_1 %s < %t.types
; RUN: FileCheck --check-prefix=TYPES_2 %s < %t.types
-; RUN: llvm-pdbdump pretty -types %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s
-; RUN: llvm-pdbdump pretty -globals %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s
+; RUN: llvm-pdbutil pretty -types %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s
+; RUN: llvm-pdbutil pretty -globals %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s
; The format is func [0x<rva_start>+<prologue_length> - 0x<rva_end>-<epilogue_length>]
; SYM_FORMAT_FPO: ---SYMBOLS---
diff --git a/test/DebugInfo/PDB/Inputs/debug-subsections.yaml b/test/DebugInfo/PDB/Inputs/debug-subsections.yaml
new file mode 100644
index 000000000000..ab199d2f21a0
--- /dev/null
+++ b/test/DebugInfo/PDB/Inputs/debug-subsections.yaml
@@ -0,0 +1,91 @@
+DbiStream:
+ Modules:
+ - Module: 'Foo.obj'
+ ObjFile: 'Foo.obj'
+ Subsections:
+ - !CrossModuleExports
+ Exports:
+ - LocalId: 4852
+ GlobalId: 9283
+ - LocalId: 2147487875
+ GlobalId: 9123
+ - Module: 'Bar.obj'
+ ObjFile: 'Bar.obj'
+ Subsections:
+ - !CrossModuleExports
+ Exports:
+ - LocalId: 4265
+ GlobalId: 6097
+ - LocalId: 4297
+ GlobalId: 4677
+ - !CrossModuleImports
+ Imports:
+ - Module: 'Foo.obj'
+ Imports: [ 4852, 2147487875 ]
+ - Module: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+ ObjFile: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+ SourceFiles:
+ - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+ Subsections:
+ - !FileChecksums
+ Checksums:
+ - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+ Kind: MD5
+ Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC
+ - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+ Kind: MD5
+ Checksum: 1154D69F5B2650196E1FC34F4134E56B
+ - !Lines
+ CodeSize: 10
+ Flags: [ ]
+ RelocOffset: 100016
+ RelocSegment: 1
+ Blocks:
+ - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+ Lines:
+ - Offset: 0
+ LineStart: 5
+ IsStatement: true
+ EndDelta: 0
+ - Offset: 3
+ LineStart: 6
+ IsStatement: true
+ EndDelta: 0
+ - Offset: 8
+ LineStart: 7
+ IsStatement: true
+ EndDelta: 0
+ Columns:
+ - !InlineeLines
+ HasExtraFiles: false
+ Sites:
+ - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+ LineNum: 26950
+ Inlinee: 22767
+ # The following subsections don't normally appear in PDB files, but we test
+ # them anyway
+ - Module: 'ObjFileSubsections'
+ ObjFile: 'ObjFileSubsections'
+ Subsections:
+ - !StringTable
+ Strings:
+ - 'String1'
+ - 'String2'
+ - 'String3'
+ - !Symbols
+ Records:
+ - Kind: S_OBJNAME
+ ObjNameSym:
+ Signature: 0
+ ObjectName: 'ObjFileSubsections'
+ - !FrameData
+ Frames:
+ - CodeSize: 1
+ FrameFunc: 'MyFunc'
+ LocalSize: 2
+ MaxStackSize: 3
+ ParamsSize: 4
+ PrologSize: 5
+ RvaStart: 6
+ SavedRegsSize: 7
+...
diff --git a/test/DebugInfo/PDB/Inputs/simple-line-info.yaml b/test/DebugInfo/PDB/Inputs/simple-line-info.yaml
deleted file mode 100644
index d1324d26d8bb..000000000000
--- a/test/DebugInfo/PDB/Inputs/simple-line-info.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
----
-DbiStream:
- Modules:
- - Module: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
- ObjFile: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
- SourceFiles:
- - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
- Subsections:
- - !FileChecksums
- Checksums:
- - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
- Kind: MD5
- Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC
- - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
- Kind: MD5
- Checksum: 1154D69F5B2650196E1FC34F4134E56B
- - !Lines
- CodeSize: 10
- Flags: [ ]
- RelocOffset: 16
- RelocSegment: 1
- Blocks:
- - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
- Lines:
- - Offset: 0
- LineStart: 5
- IsStatement: true
- EndDelta: 0
- - Offset: 3
- LineStart: 6
- IsStatement: true
- EndDelta: 0
- - Offset: 8
- LineStart: 7
- IsStatement: true
- EndDelta: 0
- Columns:
- - !InlineeLines
- HasExtraFiles: false
- Sites:
- - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
- LineNum: 26950
- Inlinee: 22767
-...
diff --git a/test/DebugInfo/PDB/Native/pdb-native-compilands.test b/test/DebugInfo/PDB/Native/pdb-native-compilands.test
index 38234d719e50..2c7011c65708 100644
--- a/test/DebugInfo/PDB/Native/pdb-native-compilands.test
+++ b/test/DebugInfo/PDB/Native/pdb-native-compilands.test
@@ -1,7 +1,7 @@
; Test that the native PDB reader can enumerate the compilands.
-; RUN: llvm-pdbdump pretty -native -compilands %p/../Inputs/empty.pdb \
+; RUN: llvm-pdbutil pretty -native -compilands %p/../Inputs/empty.pdb \
; RUN: | FileCheck -check-prefix=EMPTY %s
-; RUN: llvm-pdbdump pretty -native -compilands %p/../Inputs/big-read.pdb \
+; RUN: llvm-pdbutil pretty -native -compilands %p/../Inputs/big-read.pdb \
; RUN: | FileCheck -check-prefix=BIGREAD %s
; Reference output was generated with the DIA reader to ensure that the
diff --git a/test/DebugInfo/PDB/Native/pdb-native-summary.test b/test/DebugInfo/PDB/Native/pdb-native-summary.test
index bd32f198a390..116d2564fda3 100644
--- a/test/DebugInfo/PDB/Native/pdb-native-summary.test
+++ b/test/DebugInfo/PDB/Native/pdb-native-summary.test
@@ -1,5 +1,5 @@
; Test that the native PDB reader gets the PDB summary correct.
-; RUN: llvm-pdbdump pretty -native -color-output=false %p/../Inputs/empty.pdb \
+; RUN: llvm-pdbutil pretty -native -color-output=false %p/../Inputs/empty.pdb \
; RUN: | FileCheck -check-prefix=EMPTY %s
; Reference output was generated with the DIA reader to ensure that the
diff --git a/test/DebugInfo/PDB/pdb-longname-truncation.test b/test/DebugInfo/PDB/pdb-longname-truncation.test
index 2e0284fbe916..06eae8ea226d 100644
--- a/test/DebugInfo/PDB/pdb-longname-truncation.test
+++ b/test/DebugInfo/PDB/pdb-longname-truncation.test
@@ -1,3 +1,3 @@
; For now just verify that this doesn't cause an error. Later we pdbdump can
; do type lookup, we can verify that the name matches what we expect.
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.pdb %p/Inputs/longname-truncation.yaml
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.pdb %p/Inputs/longname-truncation.yaml
diff --git a/test/DebugInfo/PDB/pdb-minimal-construct.test b/test/DebugInfo/PDB/pdb-minimal-construct.test
index d75c51056c9f..326c6cf9231b 100644
--- a/test/DebugInfo/PDB/pdb-minimal-construct.test
+++ b/test/DebugInfo/PDB/pdb-minimal-construct.test
@@ -1,11 +1,11 @@
-; This testcase verifies that we can produce a minimal PDB, while
-; serving as an example for how to construct a minimal PDB for other
-; testcases. It takes as input a small fragment of hand-written yaml
-; that specifies nothing about the PDB other than a definition of one
-; symbol that it contains. Then it produces a PDB, and uses the
-; resulting PDB to go back to yaml, and verify that the resulting yaml
-; is identical.
-
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.pdb %p/Inputs/one-symbol.yaml
-; RUN: llvm-pdbdump pdb2yaml -minimal -dbi-module-syms -no-file-headers %t.pdb > %t.pdb.yaml
-; RUN: diff -b %p/Inputs/one-symbol.yaml %t.pdb.yaml
+; This testcase verifies that we can produce a minimal PDB, while
+; serving as an example for how to construct a minimal PDB for other
+; testcases. It takes as input a small fragment of hand-written yaml
+; that specifies nothing about the PDB other than a definition of one
+; symbol that it contains. Then it produces a PDB, and uses the
+; resulting PDB to go back to yaml, and verify that the resulting yaml
+; is identical.
+
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.pdb %p/Inputs/one-symbol.yaml
+; RUN: llvm-pdbutil pdb2yaml -minimal -module-syms -no-file-headers %t.pdb > %t.pdb.yaml
+; RUN: diff -b %p/Inputs/one-symbol.yaml %t.pdb.yaml
diff --git a/test/DebugInfo/PDB/pdb-yaml-symbols.test b/test/DebugInfo/PDB/pdb-yaml-symbols.test
index e3cdcb6ababb..574065176b5b 100644
--- a/test/DebugInfo/PDB/pdb-yaml-symbols.test
+++ b/test/DebugInfo/PDB/pdb-yaml-symbols.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pdb2yaml -dbi-module-syms %p/Inputs/empty.pdb \
+; RUN: llvm-pdbutil pdb2yaml -module-syms %p/Inputs/empty.pdb \
; RUN: | FileCheck -check-prefix=YAML %s
diff --git a/test/DebugInfo/PDB/pdb-yaml-types.test b/test/DebugInfo/PDB/pdb-yaml-types.test
index b3108591271e..f65d9edaa549 100644
--- a/test/DebugInfo/PDB/pdb-yaml-types.test
+++ b/test/DebugInfo/PDB/pdb-yaml-types.test
@@ -1,7 +1,7 @@
-RUN: llvm-pdbdump pdb2yaml -tpi-stream %p/Inputs/big-read.pdb > %t.yaml
+RUN: llvm-pdbutil pdb2yaml -tpi-stream %p/Inputs/big-read.pdb > %t.yaml
RUN: FileCheck -check-prefix=YAML %s < %t.yaml
-RUN: llvm-pdbdump yaml2pdb %t.yaml -pdb %t.pdb
-RUN: llvm-pdbdump raw -tpi-records %t.pdb | FileCheck %s --check-prefix=PDB
+RUN: llvm-pdbutil yaml2pdb %t.yaml -pdb %t.pdb
+RUN: llvm-pdbutil raw -tpi-records %t.pdb | FileCheck %s --check-prefix=PDB
Only verify the beginning of the type stream.
diff --git a/test/DebugInfo/PDB/pdbdump-debug-subsections.test b/test/DebugInfo/PDB/pdbdump-debug-subsections.test
new file mode 100644
index 000000000000..52f7bb52da2a
--- /dev/null
+++ b/test/DebugInfo/PDB/pdbdump-debug-subsections.test
@@ -0,0 +1,210 @@
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.pdb %p/Inputs/debug-subsections.yaml
+; RUN: llvm-pdbutil pdb2yaml -all -no-file-headers %t.pdb | FileCheck --check-prefix=YAML %s
+; RUN: llvm-pdbutil raw -subsections=all %t.pdb | FileCheck --check-prefix=RAW %s
+
+YAML: Modules:
+YAML-NEXT: - Module: Foo.obj
+YAML-NEXT: ObjFile: Foo.obj
+YAML-NEXT: Subsections:
+YAML-NEXT: - !CrossModuleExports
+YAML-NEXT: Exports:
+YAML-NEXT: - LocalId: 4852
+YAML-NEXT: GlobalId: 9283
+YAML-NEXT: - LocalId: 2147487875
+YAML-NEXT: GlobalId: 9123
+YAML: - Module: Bar.obj
+YAML-NEXT: ObjFile: Bar.obj
+YAML-NEXT: Subsections:
+YAML-NEXT: - !CrossModuleExports
+YAML-NEXT: Exports:
+YAML-NEXT: - LocalId: 4265
+YAML-NEXT: GlobalId: 6097
+YAML-NEXT: - LocalId: 4297
+YAML-NEXT: GlobalId: 4677
+YAML-NEXT: - !CrossModuleImports
+YAML-NEXT: Imports:
+YAML-NEXT: - Module: Foo.obj
+YAML-NEXT: Imports: [ 4852, 2147487875 ]
+YAML: - Module: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+YAML-NEXT: ObjFile: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+YAML-NEXT: SourceFiles:
+YAML-NEXT: - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML-NEXT: Subsections:
+YAML-NEXT: - !FileChecksums
+YAML-NEXT: Checksums:
+YAML-NEXT: - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML-NEXT: Kind: MD5
+YAML-NEXT: Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC
+YAML-NEXT: - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+YAML-NEXT: Kind: MD5
+YAML-NEXT: Checksum: 1154D69F5B2650196E1FC34F4134E56B
+YAML-NEXT: - !Lines
+YAML-NEXT: CodeSize: 10
+YAML-NEXT: Flags: [ ]
+YAML-NEXT: RelocOffset: 100016
+YAML-NEXT: RelocSegment: 1
+YAML-NEXT: Blocks:
+YAML-NEXT: - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML-NEXT: Lines:
+YAML-NEXT: - Offset: 0
+YAML-NEXT: LineStart: 5
+YAML-NEXT: IsStatement: true
+YAML-NEXT: EndDelta: 0
+YAML-NEXT: - Offset: 3
+YAML-NEXT: LineStart: 6
+YAML-NEXT: IsStatement: true
+YAML-NEXT: EndDelta: 0
+YAML-NEXT: - Offset: 8
+YAML-NEXT: LineStart: 7
+YAML-NEXT: IsStatement: true
+YAML-NEXT: EndDelta: 0
+YAML-NEXT: Columns:
+YAML-NEXT: - !InlineeLines
+YAML-NEXT: HasExtraFiles: false
+YAML-NEXT: Sites:
+YAML-NEXT: - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+YAML-NEXT: LineNum: 26950
+YAML-NEXT: Inlinee: 22767
+
+
+RAW: DBI Stream {
+RAW: Modules [
+RAW-NEXT: {
+RAW-NEXT: Name: Foo.obj
+RAW: Subsections [
+RAW-NEXT: CrossModuleExports [
+RAW-NEXT: Export {
+RAW-NEXT: Local: 0x12F4
+RAW-NEXT: Global: 0x2443
+RAW-NEXT: }
+RAW-NEXT: Export {
+RAW-NEXT: Local: 0x80001083
+RAW-NEXT: Global: 0x23A3
+RAW-NEXT: }
+RAW-NEXT: ]
+RAW-NEXT: ]
+RAW-NEXT: }
+RAW-NEXT: {
+RAW-NEXT: Name: Bar.obj
+RAW: Subsections [
+RAW-NEXT: CrossModuleExports [
+RAW-NEXT: Export {
+RAW-NEXT: Local: 0x10A9
+RAW-NEXT: Global: 0x17D1
+RAW-NEXT: }
+RAW-NEXT: Export {
+RAW-NEXT: Local: 0x10C9
+RAW-NEXT: Global: 0x1245
+RAW-NEXT: }
+RAW-NEXT: ]
+RAW-NEXT: CrossModuleImports [
+RAW-NEXT: ModuleImport {
+RAW-NEXT: Module: Foo.obj
+RAW-NEXT: Imports: [0x12F4, 0x80001083]
+RAW-NEXT: }
+RAW-NEXT: ]
+RAW-NEXT: ]
+RAW-NEXT: }
+RAW-NEXT: {
+RAW-NEXT: Name: d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj
+RAW: Subsections [
+RAW-NEXT: FileChecksums {
+RAW-NEXT: Checksum {
+RAW-NEXT: FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+RAW-NEXT: Kind: MD5 (0x1)
+RAW-NEXT: Checksum (
+RAW-NEXT: 0000: A0A5BD0D 3ECD93FC 29D19DE8 26FBF4BC |....>...)...&...|
+RAW-NEXT: )
+RAW-NEXT: }
+RAW-NEXT: Checksum {
+RAW-NEXT: FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
+RAW-NEXT: Kind: MD5 (0x1)
+RAW-NEXT: Checksum (
+RAW-NEXT: 0000: 1154D69F 5B265019 6E1FC34F 4134E56B |.T..[&P.n..OA4.k|
+RAW-NEXT: )
+RAW-NEXT: }
+RAW-NEXT: }
+RAW-NEXT: Lines {
+RAW-NEXT: RelocSegment: 1
+RAW-NEXT: RelocOffset: 100016
+RAW-NEXT: CodeSize: 10
+RAW-NEXT: HasColumns: No
+RAW-NEXT: FileEntry {
+RAW-NEXT: FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+RAW-NEXT: Line {
+RAW-NEXT: Offset: 0
+RAW-NEXT: LineNumberStart: 5
+RAW-NEXT: EndDelta: 0
+RAW-NEXT: IsStatement: Yes
+RAW-NEXT: }
+RAW-NEXT: Line {
+RAW-NEXT: Offset: 3
+RAW-NEXT: LineNumberStart: 6
+RAW-NEXT: EndDelta: 0
+RAW-NEXT: IsStatement: Yes
+RAW-NEXT: }
+RAW-NEXT: Line {
+RAW-NEXT: Offset: 8
+RAW-NEXT: LineNumberStart: 7
+RAW-NEXT: EndDelta: 0
+RAW-NEXT: IsStatement: Yes
+RAW-NEXT: }
+RAW-NEXT: }
+RAW-NEXT: }
+RAW-NEXT: InlineeLines {
+RAW-NEXT: HasExtraFiles: No
+RAW-NEXT: Lines [
+RAW-NEXT: Inlinee {
+RAW-NEXT: FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
+RAW-NEXT: Function {
+RAW-NEXT: Index: 0x58ef (unknown function)
+RAW-NEXT: }
+RAW-NEXT: SourceLine: 26950
+RAW-NEXT: }
+RAW-NEXT: ]
+RAW-NEXT: }
+RAW-NEXT: ]
+RAW-NEXT: }
+RAW-NEXT: {
+RAW-NEXT: Name: ObjFileSubsections
+RAW-NEXT: Debug Stream Index: 11
+RAW-NEXT: Object File Name: ObjFileSubsections
+RAW-NEXT: Num Files: 0
+RAW-NEXT: Source File Name Idx: 0
+RAW-NEXT: Pdb File Name Idx: 0
+RAW-NEXT: Line Info Byte Size: 0
+RAW-NEXT: C13 Line Info Byte Size: 116
+RAW-NEXT: Symbol Byte Size: 4
+RAW-NEXT: Type Server Index: 0
+RAW-NEXT: Has EC Info: No
+RAW-NEXT: Subsections [
+RAW-NEXT: String Table [
+RAW-NEXT: String1
+RAW-NEXT: String2
+RAW-NEXT: String3
+RAW-NEXT: ]
+RAW-NEXT: Symbols [
+RAW-NEXT: {
+RAW-NEXT: ObjectName {
+RAW-NEXT: Signature: 0x0
+RAW-NEXT: ObjectName: ObjFileSubsections
+RAW-NEXT: }
+RAW-NEXT: }
+RAW-NEXT: ]
+RAW-NEXT: FrameData [
+RAW-NEXT: Frame {
+RAW-NEXT: Rva: 6
+RAW-NEXT: CodeSize: 1
+RAW-NEXT: LocalSize: 2
+RAW-NEXT: ParamsSize: 4
+RAW-NEXT: MaxStackSize: 3
+RAW-NEXT: FrameFunc: MyFunc
+RAW-NEXT: PrologSize: 5
+RAW-NEXT: SavedRegsSize: 7
+RAW-NEXT: Flags: 0
+RAW-NEXT: }
+RAW-NEXT: ]
+RAW-NEXT: ]
+RAW-NEXT: }
+RAW-NEXT: ]
+RAW-NEXT: }
diff --git a/test/DebugInfo/PDB/pdbdump-headers.test b/test/DebugInfo/PDB/pdbdump-headers.test
index 4e6bb75f8b8d..82fe91dd20aa 100644
--- a/test/DebugInfo/PDB/pdbdump-headers.test
+++ b/test/DebugInfo/PDB/pdbdump-headers.test
@@ -1,12 +1,12 @@
-; RUN: llvm-pdbdump raw -headers -string-table -tpi-records -tpi-record-bytes -module-syms \
+; RUN: llvm-pdbutil raw -headers -string-table -tpi-records -tpi-record-bytes -module-syms \
; RUN: -sym-record-bytes -globals -publics -module-files \
; RUN: -stream-summary -stream-blocks -ipi-records -ipi-record-bytes \
-; RUN: -section-contribs -section-map -section-headers -line-info \
+; RUN: -section-contribs -section-map -section-headers -subsections=all \
; RUN: -tpi-hash -fpo -page-stats %p/Inputs/empty.pdb | FileCheck -check-prefix=EMPTY %s
-; RUN: llvm-pdbdump raw -all %p/Inputs/empty.pdb | FileCheck -check-prefix=ALL %s
-; RUN: llvm-pdbdump raw -headers -modules -module-files \
+; RUN: llvm-pdbutil raw -all %p/Inputs/empty.pdb | FileCheck -check-prefix=ALL %s
+; RUN: llvm-pdbutil raw -headers -modules -module-files \
; RUN: %p/Inputs/big-read.pdb | FileCheck -check-prefix=BIG %s
-; RUN: not llvm-pdbdump raw -headers %p/Inputs/bad-block-size.pdb 2>&1 | FileCheck -check-prefix=BAD-BLOCK-SIZE %s
+; RUN: not llvm-pdbutil raw -headers %p/Inputs/bad-block-size.pdb 2>&1 | FileCheck -check-prefix=BAD-BLOCK-SIZE %s
; EMPTY: FileHeaders {
; EMPTY-NEXT: BlockSize: 4096
@@ -484,7 +484,34 @@
; EMPTY-NEXT: )
; EMPTY-NEXT: }
; EMPTY-NEXT: ]
-; EMPTY-NEXT: LineInfo [
+; EMPTY-NEXT: Subsections [
+; EMPTY-NEXT: Lines {
+; EMPTY-NEXT: RelocSegment: 1
+; EMPTY-NEXT: RelocOffset: 16
+; EMPTY-NEXT: CodeSize: 10
+; EMPTY-NEXT: HasColumns: No
+; EMPTY-NEXT: FileEntry {
+; EMPTY-NEXT: FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+; EMPTY-NEXT: Line {
+; EMPTY-NEXT: Offset: 0
+; EMPTY-NEXT: LineNumberStart: 5
+; EMPTY-NEXT: EndDelta: 0
+; EMPTY-NEXT: IsStatement: Yes
+; EMPTY-NEXT: }
+; EMPTY-NEXT: Line {
+; EMPTY-NEXT: Offset: 3
+; EMPTY-NEXT: LineNumberStart: 6
+; EMPTY-NEXT: EndDelta: 0
+; EMPTY-NEXT: IsStatement: Yes
+; EMPTY-NEXT: }
+; EMPTY-NEXT: Line {
+; EMPTY-NEXT: Offset: 8
+; EMPTY-NEXT: LineNumberStart: 7
+; EMPTY-NEXT: EndDelta: 0
+; EMPTY-NEXT: IsStatement: Yes
+; EMPTY-NEXT: }
+; EMPTY-NEXT: }
+; EMPTY-NEXT: }
; EMPTY-NEXT: FileChecksums {
; EMPTY-NEXT: Checksum {
; EMPTY-NEXT: FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
@@ -494,35 +521,6 @@
; EMPTY-NEXT: )
; EMPTY-NEXT: }
; EMPTY-NEXT: }
-; EMPTY-NEXT: Lines {
-; EMPTY-NEXT: Block {
-; EMPTY-NEXT: RelocSegment: 1
-; EMPTY-NEXT: RelocOffset: 16
-; EMPTY-NEXT: CodeSize: 10
-; EMPTY-NEXT: HasColumns: No
-; EMPTY-NEXT: Lines {
-; EMPTY-NEXT: FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
-; EMPTY-NEXT: Line {
-; EMPTY-NEXT: Offset: 0
-; EMPTY-NEXT: LineNumberStart: 5
-; EMPTY-NEXT: EndDelta: 0
-; EMPTY-NEXT: IsStatement: Yes
-; EMPTY-NEXT: }
-; EMPTY-NEXT: Line {
-; EMPTY-NEXT: Offset: 3
-; EMPTY-NEXT: LineNumberStart: 6
-; EMPTY-NEXT: EndDelta: 0
-; EMPTY-NEXT: IsStatement: Yes
-; EMPTY-NEXT: }
-; EMPTY-NEXT: Line {
-; EMPTY-NEXT: Offset: 8
-; EMPTY-NEXT: LineNumberStart: 7
-; EMPTY-NEXT: EndDelta: 0
-; EMPTY-NEXT: IsStatement: Yes
-; EMPTY-NEXT: }
-; EMPTY-NEXT: }
-; EMPTY-NEXT: }
-; EMPTY-NEXT: }
; EMPTY-NEXT: ]
; EMPTY-NEXT: }
; EMPTY-NEXT: {
@@ -757,7 +755,7 @@
; EMPTY-NEXT: )
; EMPTY-NEXT: }
; EMPTY-NEXT: ]
-; EMPTY-NEXT: LineInfo [
+; EMPTY-NEXT: Subsections [
; EMPTY-NEXT: ]
; EMPTY-NEXT: }
; EMPTY-NEXT: ]
diff --git a/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test b/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test
index ac32ce040b98..2639490f542a 100644
--- a/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test
+++ b/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test
@@ -1,12 +1,12 @@
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-ids-and-types-1.yaml
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-ids-and-types-2.yaml
-; RUN: llvm-pdbdump merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=TPI-TYPES %s
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=INTMAIN %s
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=VOIDMAIN %s
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-TYPES %s
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-NAMES %s
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-UDT %s
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-ids-and-types-1.yaml
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-ids-and-types-2.yaml
+; RUN: llvm-pdbutil merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=TPI-TYPES %s
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=INTMAIN %s
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=VOIDMAIN %s
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-TYPES %s
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-NAMES %s
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-UDT %s
TPI-TYPES: Type Info Stream (TPI)
TPI-TYPES: Record count: 9
diff --git a/test/DebugInfo/PDB/pdbdump-mergeids.test b/test/DebugInfo/PDB/pdbdump-mergeids.test
index 6a4d19eba042..1c0a8704af2a 100644
--- a/test/DebugInfo/PDB/pdbdump-mergeids.test
+++ b/test/DebugInfo/PDB/pdbdump-mergeids.test
@@ -1,9 +1,9 @@
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-ids-1.yaml
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-ids-2.yaml
-; RUN: llvm-pdbdump merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=MERGED %s
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=SUBSTRS %s
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=TPI-EMPTY %s
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-ids-1.yaml
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-ids-2.yaml
+; RUN: llvm-pdbutil merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=MERGED %s
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=SUBSTRS %s
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=TPI-EMPTY %s
MERGED: Type Info Stream (IPI)
diff --git a/test/DebugInfo/PDB/pdbdump-mergetypes.test b/test/DebugInfo/PDB/pdbdump-mergetypes.test
index a26b92631828..8d32b4d176f2 100644
--- a/test/DebugInfo/PDB/pdbdump-mergetypes.test
+++ b/test/DebugInfo/PDB/pdbdump-mergetypes.test
@@ -1,8 +1,8 @@
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-types-1.yaml
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-types-2.yaml
-; RUN: llvm-pdbdump merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=MERGED %s
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=ARGLIST %s
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-types-1.yaml
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-types-2.yaml
+; RUN: llvm-pdbutil merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=MERGED %s
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=ARGLIST %s
MERGED: Type Info Stream (TPI)
diff --git a/test/DebugInfo/PDB/pdbdump-raw-blocks.test b/test/DebugInfo/PDB/pdbdump-raw-blocks.test
index b43df970e5d5..14e1f86fc029 100644
--- a/test/DebugInfo/PDB/pdbdump-raw-blocks.test
+++ b/test/DebugInfo/PDB/pdbdump-raw-blocks.test
@@ -1,8 +1,8 @@
-; RUN: llvm-pdbdump raw -block-data=0 %p/Inputs/empty.pdb | FileCheck --check-prefix=BLOCK0 %s
-; RUN: llvm-pdbdump raw -block-data=0-1 %p/Inputs/empty.pdb | FileCheck --check-prefix=BLOCK01 %s
-; RUN: not llvm-pdbdump raw -block-data=0,1 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
-; RUN: not llvm-pdbdump raw -block-data=0a1 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
-; RUN: not llvm-pdbdump raw -block-data=0- %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
+; RUN: llvm-pdbutil raw -block-data=0 %p/Inputs/empty.pdb | FileCheck --check-prefix=BLOCK0 %s
+; RUN: llvm-pdbutil raw -block-data=0-1 %p/Inputs/empty.pdb | FileCheck --check-prefix=BLOCK01 %s
+; RUN: not llvm-pdbutil raw -block-data=0,1 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
+; RUN: not llvm-pdbutil raw -block-data=0a1 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
+; RUN: not llvm-pdbutil raw -block-data=0- %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
BLOCK0: Block Data {
BLOCK0-NEXT: Block 0 (
diff --git a/test/DebugInfo/PDB/pdbdump-raw-stream.test b/test/DebugInfo/PDB/pdbdump-raw-stream.test
index 6b6624f16015..846960a0964a 100644
--- a/test/DebugInfo/PDB/pdbdump-raw-stream.test
+++ b/test/DebugInfo/PDB/pdbdump-raw-stream.test
@@ -1,5 +1,5 @@
-; RUN: llvm-pdbdump raw -stream-data=1 %p/Inputs/empty.pdb | FileCheck --check-prefix=STREAM1 %s
-; RUN: not llvm-pdbdump raw -stream-data=100 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=INVALIDSTREAM %s
+; RUN: llvm-pdbutil raw -stream-data=1 %p/Inputs/empty.pdb | FileCheck --check-prefix=STREAM1 %s
+; RUN: not llvm-pdbutil raw -stream-data=100 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=INVALIDSTREAM %s
STREAM1: Stream Data {
STREAM1-NEXT: Stream {
diff --git a/test/DebugInfo/PDB/pdbdump-readwrite.test b/test/DebugInfo/PDB/pdbdump-readwrite.test
index 4756faf68c2d..ee53f3b4cd2a 100644
--- a/test/DebugInfo/PDB/pdbdump-readwrite.test
+++ b/test/DebugInfo/PDB/pdbdump-readwrite.test
@@ -1,10 +1,10 @@
-RUN: llvm-pdbdump pdb2yaml -dbi-module-info -dbi-module-source-info \
-RUN: -dbi-stream -pdb-stream -string-table -tpi-stream -stream-directory \
+RUN: llvm-pdbutil pdb2yaml -modules -module-files -dbi-stream \
+RUN: -pdb-stream -string-table -tpi-stream -stream-directory \
RUN: -stream-metadata %p/Inputs/empty.pdb > %t.1
-RUN: llvm-pdbdump yaml2pdb -pdb=%t.2 %t.1
+RUN: llvm-pdbutil yaml2pdb -pdb=%t.2 %t.1
-RUN: llvm-pdbdump raw -headers -string-table -tpi-records %p/Inputs/empty.pdb | FileCheck %s
-RUN: llvm-pdbdump raw -headers -string-table -tpi-records %t.2 | FileCheck %s
+RUN: llvm-pdbutil raw -headers -string-table -tpi-records %p/Inputs/empty.pdb | FileCheck %s
+RUN: llvm-pdbutil raw -headers -string-table -tpi-records %t.2 | FileCheck %s
CHECK: FileHeaders {
CHECK-NEXT: BlockSize: 4096
diff --git a/test/DebugInfo/PDB/pdbdump-source-names.test b/test/DebugInfo/PDB/pdbdump-source-names.test
index 181f4d5e0ee4..f16a2699a154 100644
--- a/test/DebugInfo/PDB/pdbdump-source-names.test
+++ b/test/DebugInfo/PDB/pdbdump-source-names.test
@@ -6,11 +6,11 @@
# that differ by one byte, so that at least one of those will only
# pass if alignment is implemented correctly.
-RUN: llvm-pdbdump yaml2pdb -pdb=%T/source-names-1.pdb %p/Inputs/source-names-1.yaml
-RUN: llvm-pdbdump pdb2yaml -dbi-module-source-info %T/source-names-1.pdb \
+RUN: llvm-pdbutil yaml2pdb -pdb=%T/source-names-1.pdb %p/Inputs/source-names-1.yaml
+RUN: llvm-pdbutil pdb2yaml -module-files %T/source-names-1.pdb \
RUN: | FileCheck -check-prefix=CHECK1 %s
-RUN: llvm-pdbdump yaml2pdb -pdb=%T/source-names-2.pdb %p/Inputs/source-names-2.yaml
-RUN: llvm-pdbdump pdb2yaml -dbi-module-source-info %T/source-names-2.pdb \
+RUN: llvm-pdbutil yaml2pdb -pdb=%T/source-names-2.pdb %p/Inputs/source-names-2.yaml
+RUN: llvm-pdbutil pdb2yaml -module-files %T/source-names-2.pdb \
RUN: | FileCheck -check-prefix=CHECK2 %s
CHECK1: SourceFiles:
diff --git a/test/DebugInfo/PDB/pdbdump-write.test b/test/DebugInfo/PDB/pdbdump-write.test
index 393473a53af1..b6d1959abe6a 100644
--- a/test/DebugInfo/PDB/pdbdump-write.test
+++ b/test/DebugInfo/PDB/pdbdump-write.test
@@ -10,11 +10,11 @@
; stream metadata, since the layout of the MSF file might be different
; (for example if we don't write the entire stream)
;
-; RUN: llvm-pdbdump pdb2yaml -stream-metadata -stream-directory \
-; RUN: -pdb-stream -tpi-stream -dbi-module-syms %p/Inputs/empty.pdb > %t.1
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2 %t.1
-; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \
-; RUN: -dbi-module-syms -no-file-headers %p/Inputs/empty.pdb > %t.3
-; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \
-; RUN: -dbi-module-syms -no-file-headers %t.2 > %t.4
+; RUN: llvm-pdbutil pdb2yaml -stream-metadata -stream-directory \
+; RUN: -pdb-stream -tpi-stream -module-syms %p/Inputs/empty.pdb > %t.1
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.2 %t.1
+; RUN: llvm-pdbutil pdb2yaml -pdb-stream -tpi-stream \
+; RUN: -module-syms -no-file-headers %p/Inputs/empty.pdb > %t.3
+; RUN: llvm-pdbutil pdb2yaml -pdb-stream -tpi-stream \
+; RUN: -module-syms -no-file-headers %t.2 > %t.4
; RUN: diff %t.3 %t.4
diff --git a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test
deleted file mode 100644
index 1d63c85352aa..000000000000
--- a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test
+++ /dev/null
@@ -1,71 +0,0 @@
-; This testcase verifies that we can produce a PDB with line
-; information. It does this by describing some line information
-; manually in YAML, creating a PDB out of it, then dumping then
-; line information from the resulting PDB.
-
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.pdb %p/Inputs/simple-line-info.yaml
-; RUN: llvm-pdbdump raw -line-info %t.pdb | FileCheck -check-prefix=LINES %s
-
-LINES: Modules [
-LINES-NEXT: {
-LINES-NEXT: Name: d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj
-LINES: LineInfo [
-LINES-NEXT: FileChecksums {
-LINES-NEXT: Checksum {
-LINES-NEXT: FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
-LINES-NEXT: Kind: MD5 (0x1)
-LINES-NEXT: Checksum (
-LINES-NEXT: 0000: A0A5BD0D 3ECD93FC 29D19DE8 26FBF4BC |....>...)...&...|
-LINES-NEXT: )
-LINES-NEXT: }
-LINES-NEXT: Checksum {
-LINES-NEXT: FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
-LINES-NEXT: Kind: MD5 (0x1)
-LINES-NEXT: Checksum (
-LINES-NEXT: 0000: 1154D69F 5B265019 6E1FC34F 4134E56B |.T..[&P.n..OA4.k|
-LINES-NEXT: )
-LINES-NEXT: }
-LINES-NEXT: }
-LINES-NEXT: Lines {
-LINES-NEXT: Block {
-LINES-NEXT: RelocSegment: 1
-LINES-NEXT: RelocOffset: 16
-LINES-NEXT: CodeSize: 10
-LINES-NEXT: HasColumns: No
-LINES-NEXT: Lines {
-LINES-NEXT: FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
-LINES-NEXT: Line {
-LINES-NEXT: Offset: 0
-LINES-NEXT: LineNumberStart: 5
-LINES-NEXT: EndDelta: 0
-LINES-NEXT: IsStatement: Yes
-LINES-NEXT: }
-LINES-NEXT: Line {
-LINES-NEXT: Offset: 3
-LINES-NEXT: LineNumberStart: 6
-LINES-NEXT: EndDelta: 0
-LINES-NEXT: IsStatement: Yes
-LINES-NEXT: }
-LINES-NEXT: Line {
-LINES-NEXT: Offset: 8
-LINES-NEXT: LineNumberStart: 7
-LINES-NEXT: EndDelta: 0
-LINES-NEXT: IsStatement: Yes
-LINES-NEXT: }
-LINES-NEXT: }
-LINES-NEXT: }
-LINES-NEXT: }
-LINES-NEXT: InlineeLines {
-LINES-NEXT: HasExtraFiles: No
-LINES-NEXT: Lines [
-LINES-NEXT: Inlinee {
-LINES-NEXT: FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
-LINES-NEXT: Function {
-LINES-NEXT: Index: 0x58ef (unknown function)
-LINES-NEXT: }
-LINES-NEXT: SourceLine: 26950
-LINES-NEXT: }
-LINES-NEXT: ]
-LINES-NEXT: }
-LINES-NEXT: ]
-LINES-NEXT: }
diff --git a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
deleted file mode 100644
index f959805c7474..000000000000
--- a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: llvm-pdbdump pdb2yaml -dbi-module-lines %p/Inputs/empty.pdb \
-; RUN: | FileCheck -check-prefix=YAML %s
-
-
-YAML: ---
-YAML: MSF:
-YAML: SuperBlock:
-YAML: BlockSize: 4096
-YAML: FreeBlockMap: 2
-YAML: NumBlocks: 25
-YAML: NumDirectoryBytes: 136
-YAML: Unknown1: 0
-YAML: BlockMapAddr: 24
-YAML: NumDirectoryBlocks: 1
-YAML: DirectoryBlocks: [ 23 ]
-YAML: NumStreams: 0
-YAML: FileSize: 102400
-YAML: DbiStream:
-YAML: VerHeader: V70
-YAML: Age: 1
-YAML: BuildNumber: 35840
-YAML: PdbDllVersion: 31101
-YAML: PdbDllRbld: 0
-YAML: Flags: 1
-YAML: MachineType: x86
-YAML: Modules:
-YAML: - Module: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
-YAML: ObjFile: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
-YAML: SourceFiles:
-YAML: - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML: Subsections:
-YAML: - !Lines
-YAML: CodeSize: 10
-YAML: Flags: [ ]
-YAML: RelocOffset: 16
-YAML: RelocSegment: 1
-YAML: Blocks:
-YAML: - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML: Lines:
-YAML: - Offset: 0
-YAML: LineStart: 5
-YAML: IsStatement: true
-YAML: EndDelta: 0
-YAML: - Offset: 3
-YAML: LineStart: 6
-YAML: IsStatement: true
-YAML: EndDelta: 0
-YAML: - Offset: 8
-YAML: LineStart: 7
-YAML: IsStatement: true
-YAML: EndDelta: 0
-YAML: Columns:
-YAML: - !FileChecksums
-YAML: Checksums:
-YAML: - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML: Kind: MD5
-YAML: Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC
-YAML: - Module: '* Linker *'
-YAML: ObjFile: ''
-YAML: ... \ No newline at end of file
diff --git a/test/DebugInfo/PDB/pdbdump-yaml-types.test b/test/DebugInfo/PDB/pdbdump-yaml-types.test
index 7e6fcc1ca420..a01edcee1e99 100644
--- a/test/DebugInfo/PDB/pdbdump-yaml-types.test
+++ b/test/DebugInfo/PDB/pdbdump-yaml-types.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pdb2yaml -tpi-stream %p/Inputs/empty.pdb \
+; RUN: llvm-pdbutil pdb2yaml -tpi-stream %p/Inputs/empty.pdb \
; RUN: | FileCheck -check-prefix=YAML %s
YAML: ---
diff --git a/test/DebugInfo/PDB/pdbdump-yaml.test b/test/DebugInfo/PDB/pdbdump-yaml.test
index 44025be5bca7..0563230cf47c 100644
--- a/test/DebugInfo/PDB/pdbdump-yaml.test
+++ b/test/DebugInfo/PDB/pdbdump-yaml.test
@@ -1,6 +1,6 @@
-; RUN: llvm-pdbdump pdb2yaml -stream-metadata -stream-directory -string-table -pdb-stream \
+; RUN: llvm-pdbutil pdb2yaml -stream-metadata -stream-directory -string-table -pdb-stream \
; RUN: %p/Inputs/empty.pdb | FileCheck -check-prefix=YAML %s
-; RUN: llvm-pdbdump pdb2yaml -no-file-headers -stream-metadata -stream-directory -pdb-stream \
+; RUN: llvm-pdbutil pdb2yaml -no-file-headers -stream-metadata -stream-directory -pdb-stream \
; RUN: %p/Inputs/empty.pdb | FileCheck -check-prefix=NO-HEADERS %s
; YAML: ---
diff --git a/test/DebugInfo/dwarfdump-str-offsets-invalid.test b/test/DebugInfo/dwarfdump-str-offsets-invalid.test
new file mode 100644
index 000000000000..45916d28de0b
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-str-offsets-invalid.test
@@ -0,0 +1,24 @@
+; Verify that llvm-dwarfdump handles invalid string offset tables.
+
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.o | \
+RUN: FileCheck --check-prefix=INVALIDCONTRIB %s
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.o | \
+RUN: FileCheck --check-prefix=INVALIDCONTRIB %s
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.o | \
+RUN: FileCheck --check-prefix=INVALIDCONTRIB %s
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.o | \
+RUN: FileCheck --check-prefix=INVALIDLENGTH %s
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.o | \
+RUN: FileCheck --check-prefix=INVALIDSECTIONLENGTH %s
+
+INVALIDCONTRIB: .debug_str_offsets contents:
+INVALIDCONTRIB-NOT: contents:
+INVALIDCONTRIB: error: invalid contribution to string offsets table in section .debug_str_offsets.
+
+INVALIDLENGTH: .debug_str_offsets contents:
+INVALIDLENGTH-NOT: contents:
+INVALIDLENGTH: error: contribution to string offsets table in section .debug_str_offsets has invalid length.
+
+INVALIDSECTIONLENGTH: .debug_str_offsets contents:
+INVALIDSECTIONLENGTH-NOT: contents:
+INVALIDSECTIONLENGTH: error: size of .debug_str_offsets is not a multiple of 4.
diff --git a/test/DebugInfo/dwarfdump-str-offsets.test b/test/DebugInfo/dwarfdump-str-offsets.test
new file mode 100644
index 000000000000..937c9c4d6ece
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-str-offsets.test
@@ -0,0 +1,76 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets.x86_64.o | FileCheck %s
+
+; We are using a hand-constructed object file and are interest in the correct
+; diplay of the DW_str_offsetsbase attribute, the correct display of strings
+; and the dump of the .debug_str_offsets[.dwo] table.
+;
+; Abbreviation for DW_AT_str_offsets_base
+CHECK: .debug_abbrev contents:
+CHECK-NOT: contents:
+CHECK: DW_TAG_compile_unit
+CHECK-NOT: DW_TAG
+CHECK: DW_AT_str_offsets_base DW_FORM_sec_offset
+
+; Verify that strings are displayed correctly as indexed strings
+CHECK: .debug_info contents:
+CHECK-NOT: contents:
+CHECK: DW_TAG_compile_unit
+CHECK-NEXT: DW_AT_producer [DW_FORM_strx] ( indexed (00000000) string = "Handmade DWARF producer")
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "Compile_Unit_1")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000008)
+CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strx] ( indexed (00000002) string = "/home/test/CU1")
+
+; Second compile unit (b.cpp)
+CHECK: DW_TAG_compile_unit
+CHECK-NEXT: DW_AT_producer [DW_FORM_strx] ( indexed (00000000) string = "Handmade DWARF producer")
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "Compile_Unit_2")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x0000001c)
+CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strx] ( indexed (00000002) string = "/home/test/CU2")
+
+; The split CU
+CHECK: .debug_info.dwo contents:
+CHECK-NOT: contents:
+CHECK: DW_TAG_compile_unit
+CHECK-NEXT: DW_AT_producer [DW_FORM_strx] ( indexed (00000000) string = "Handmade split DWARF producer")
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "V5_split_compile_unit")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000008)
+CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strx] ( indexed (00000002) string = "/home/test/splitCU")
+
+; The type unit
+CHECK: .debug_types contents:
+CHECK: DW_TAG_type_unit
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000000) string = "Type_Unit")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000030)
+CHECK: DW_TAG_structure_type
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "MyStruct")
+
+; The split type unit
+CHECK: .debug_types.dwo contents:
+CHECK: DW_TAG_type_unit
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000000) string = "V5_split_type_unit")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x0000001c)
+CHECK: DW_TAG_structure_type
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "V5_split_Mystruct")
+
+; The .debug_str_offsets section
+CHECK: .debug_str_offsets contents:
+CHECK-NEXT: 0x00000000: Contribution size = 12, Version = 5
+CHECK-NEXT: 0x00000008: 00000000 "Handmade DWARF producer"
+CHECK-NEXT: 0x0000000c: 00000018 "Compile_Unit_1"
+CHECK-NEXT: 0x00000010: 00000027 "/home/test/CU1"
+CHECK-NEXT: 0x00000014: Contribution size = 12, Version = 5
+CHECK-NEXT: 0x0000001c: 00000000 "Handmade DWARF producer"
+CHECK-NEXT: 0x00000020: 00000036 "Compile_Unit_2"
+CHECK-NEXT: 0x00000024: 00000045 "/home/test/CU2"
+CHECK-NEXT: 0x00000028: Contribution size = 8, Version = 5
+CHECK-NEXT: 0x00000030: 00000054 "Type_Unit"
+CHECK-NEXT: 0x00000034: 0000005e "MyStruct"
+
+CHECK: .debug_str_offsets.dwo contents:
+CHECK-NEXT: 0x00000000: Contribution size = 12, Version = 5
+CHECK-NEXT: 0x00000008: 00000000 "Handmade split DWARF producer"
+CHECK-NEXT: 0x0000000c: 0000001e "V5_split_compile_unit"
+CHECK-NEXT: 0x00000010: 00000034 "/home/test/splitCU"
+CHECK-NEXT: 0x00000014: Contribution size = 8, Version = 5
+CHECK-NEXT: 0x0000001c: 00000047 "V5_split_type_unit"
+CHECK-NEXT: 0x00000020: 0000005a "V5_split_Mystruct"
diff --git a/test/FileCheck/check-dag.txt b/test/FileCheck/check-dag.txt
index 2b5a47551e83..7c5a1d18292e 100644
--- a/test/FileCheck/check-dag.txt
+++ b/test/FileCheck/check-dag.txt
@@ -12,6 +12,10 @@ add r11, r3, r4
add r10, r1, r2
mul r5, r10, r11
+# begin
+# end
+xor
+
; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2
; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4
; CHECK: mul r5, [[REG1]], [[REG2]]
@@ -24,3 +28,8 @@ mul r5, r10, r11
; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4
; CHECK-NOT: xor
; CHECK-DAG: mul r5, [[REG1]], [[REG2]]
+
+; CHECK-DAG: begin
+; CHECK-NOT: xor
+; CHECK-DAG: end
+; CHECK: xor
diff --git a/test/Instrumentation/MemorySanitizer/csr.ll b/test/Instrumentation/MemorySanitizer/csr.ll
index c288f93241b9..a7664d456368 100644
--- a/test/Instrumentation/MemorySanitizer/csr.ll
+++ b/test/Instrumentation/MemorySanitizer/csr.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
; RUN: opt < %s -msan -msan-check-access-address=1 -S | FileCheck %s --check-prefix=ADDR
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll b/test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll
index be3f1976daa1..c4ec7fa29199 100644
--- a/test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-ORIGINS %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_arith.ll b/test/Instrumentation/MemorySanitizer/vector_arith.ll
index 8be085cff33d..6652fdff89b0 100644
--- a/test/Instrumentation/MemorySanitizer/vector_arith.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_arith.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_cmp.ll b/test/Instrumentation/MemorySanitizer/vector_cmp.ll
index 62a5f573064e..910b1351330a 100644
--- a/test/Instrumentation/MemorySanitizer/vector_cmp.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_cmp.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_cvt.ll b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
index beedb0e63e50..1dd3d7d9c68d 100644
--- a/test/Instrumentation/MemorySanitizer/vector_cvt.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_pack.ll b/test/Instrumentation/MemorySanitizer/vector_pack.ll
index deb03d84802a..574e7b890034 100644
--- a/test/Instrumentation/MemorySanitizer/vector_pack.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_pack.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_shift.ll b/test/Instrumentation/MemorySanitizer/vector_shift.ll
index a4b8fdbd603f..c605c97bba17 100644
--- a/test/Instrumentation/MemorySanitizer/vector_shift.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_shift.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
; Test instrumentation of vector shift instructions.
diff --git a/test/LTO/ARM/Inputs/thumb.ll b/test/LTO/ARM/Inputs/thumb.ll
new file mode 100644
index 000000000000..cb8c2dfa5585
--- /dev/null
+++ b/test/LTO/ARM/Inputs/thumb.ll
@@ -0,0 +1,15 @@
+target triple = "thumbv7-linux-gnueabihf"
+
+define i32 @foo(i32 %a, i32 %b) #0 {
+entry:
+ %add = add i32 %a, %b
+ ret i32 %add
+}
+
+define i32 @bar(i32 %a, i32 %b) #0 {
+entry:
+ %add = add i32 %a, %b
+ ret i32 %add
+}
+
+attributes #0 = { "target-features"="+thumb-mode" }
diff --git a/test/LTO/ARM/link-arm-and-thumb.ll b/test/LTO/ARM/link-arm-and-thumb.ll
new file mode 100644
index 000000000000..743e3f66194f
--- /dev/null
+++ b/test/LTO/ARM/link-arm-and-thumb.ll
@@ -0,0 +1,32 @@
+; Testcase to check that functions from a Thumb module can be inlined in an
+; ARM function.
+;
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/Inputs/thumb.ll -o %t2.bc
+; RUN: llvm-lto -exported-symbol main \
+; RUN: -exported-symbol bar \
+; RUN: -filetype=asm \
+; RUN: -o - \
+; RUN: %t1.bc %t2.bc 2> %t3.out| FileCheck %s
+; RUN: FileCheck --allow-empty --input-file %t3.out --check-prefix STDERR %s
+
+target triple = "armv7-linux-gnueabihf"
+
+; CHECK: .code 32
+; CHECK-NEXT: main
+; CHECK-NEXT: .fnstart
+; CHECK-NEXT: mov r0, #30
+
+; CHECK: .code 16
+; CHECK-NEXT: .thumb_func
+; CHECK-NEXT: bar
+
+declare i32 @foo(i32 %a, i32 %b);
+
+define i32 @main() {
+entry:
+ %add = call i32 @foo(i32 10, i32 20)
+ ret i32 %add
+}
+
+; STDERR-NOT: warning: Linking two modules of different target triples:
diff --git a/test/LTO/Resolution/X86/linker-redef.ll b/test/LTO/Resolution/X86/linker-redef.ll
new file mode 100644
index 000000000000..802a54db93c6
--- /dev/null
+++ b/test/LTO/Resolution/X86/linker-redef.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-lto2 run -o %t1.o %t.o -r %t.o,bar,pr
+; RUN: llvm-readobj -t %t1.o.0 | FileCheck %s
+
+; CHECK: Name: bar
+; CHECK-NEXT: Value:
+; CHECK-NEXT: Size:
+; CHECK-NEXT: Binding: Weak
+; CHECK-NEXT: Type: Function
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @bar() {
+ ret void
+}
diff --git a/test/Linker/Inputs/thumb.ll b/test/Linker/Inputs/thumb.ll
new file mode 100644
index 000000000000..e15fb26a8c7e
--- /dev/null
+++ b/test/Linker/Inputs/thumb.ll
@@ -0,0 +1,16 @@
+target triple = "thumbv7-linux-gnueabihf"
+
+define i32 @foo(i32 %a, i32 %b) #0 {
+entry:
+ %add = add i32 %a, %b
+ ret i32 %add
+}
+
+define i32 @bar(i32 %a, i32 %b) #1 {
+entry:
+ %add = add i32 %a, %b
+ ret i32 %add
+}
+
+attributes #0 = { "target-features"="-thumb-mode" }
+attributes #1 = { "target-features"="+thumb-mode" }
diff --git a/test/Linker/link-arm-and-thumb.ll b/test/Linker/link-arm-and-thumb.ll
new file mode 100644
index 000000000000..a90f2128e443
--- /dev/null
+++ b/test/Linker/link-arm-and-thumb.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/Inputs/thumb.ll -o %t2.bc
+; RUN: llvm-link %t1.bc %t2.bc -S 2> %t3.out | FileCheck %s
+; RUN: FileCheck --allow-empty --input-file %t3.out --check-prefix STDERR %s
+
+target triple = "armv7-linux-gnueabihf"
+
+declare i32 @foo(i32 %a, i32 %b);
+
+define i32 @main() {
+entry:
+ %add = call i32 @foo(i32 10, i32 20)
+ ret i32 %add
+}
+
+; CHECK: define i32 @main() {
+; CHECK: define i32 @foo(i32 %a, i32 %b) [[ARM_ATTRS:#[0-9]+]]
+; CHECK: define i32 @bar(i32 %a, i32 %b) [[THUMB_ATTRS:#[0-9]+]]
+
+; CHECK: attributes [[ARM_ATTRS]] = { "target-features"="-thumb-mode" }
+; CHECK: attributes [[THUMB_ATTRS]] = { "target-features"="+thumb-mode" }
+
+; STDERR-NOT: warning: Linking two modules of different target triples:
diff --git a/test/MC/AMDGPU/sopp-err.s b/test/MC/AMDGPU/sopp-err.s
index fac0d3222909..d65e54344031 100644
--- a/test/MC/AMDGPU/sopp-err.s
+++ b/test/MC/AMDGPU/sopp-err.s
@@ -75,16 +75,16 @@ s_sendmsg sendmsg(MSG_SYSMSG, 5)
// GCN: error: invalid/unsupported code of SYSMSG_OP
s_waitcnt lgkmcnt(16)
-// GCN: error: failed parsing operand
+// GCN: error: too large value for lgkmcnt
s_waitcnt expcnt(8)
-// GCN: error: failed parsing operand
+// GCN: error: too large value for expcnt
s_waitcnt vmcnt(16)
-// GCN: error: failed parsing operand
+// GCN: error: too large value for vmcnt
s_waitcnt vmcnt(0xFFFFFFFFFFFF0000)
-// GCN: error: failed parsing operand
+// GCN: error: too large value for vmcnt
s_waitcnt vmcnt(0), expcnt(0), lgkmcnt(0),
// GCN: error: failed parsing operand
diff --git a/test/MC/AMDGPU/sym_option.s b/test/MC/AMDGPU/sym_option.s
index 5cf97c7b435d..79e3ae5bcef9 100644
--- a/test/MC/AMDGPU/sym_option.s
+++ b/test/MC/AMDGPU/sym_option.s
@@ -10,7 +10,7 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=stoney %s | FileCheck %s --check-prefix=STONEY
.byte .option.machine_version_major
-// SI: .byte 0
+// SI: .byte 6
// BONAIRE: .byte 7
// HAWAII: .byte 7
// KABINI: .byte 7
@@ -37,7 +37,7 @@
// SI: .byte 0
// BONAIRE: .byte 0
// HAWAII: .byte 1
-// KABINI: .byte 2
+// KABINI: .byte 3
// ICELAND: .byte 0
// CARRIZO: .byte 1
// TONGA: .byte 2
diff --git a/test/MC/ARM/arm-thumb-tail-call.ll b/test/MC/ARM/arm-thumb-tail-call.ll
new file mode 100644
index 000000000000..c166719505df
--- /dev/null
+++ b/test/MC/ARM/arm-thumb-tail-call.ll
@@ -0,0 +1,25 @@
+; RUN: llc -O0 < %s -mtriple armv7-linux-gnueabi -o - \
+; RUN: | llvm-mc -triple armv7-linux-gnueabi -filetype=obj -o - \
+; RUN: | llvm-readobj -r | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+define internal i32 @arm_fn() #1 {
+ %1 = tail call i32 @thumb_fn()
+ ret i32 %1
+}
+
+define internal i32 @thumb_fn() #2 {
+ %1 = tail call i32 @arm_fn()
+ ret i32 %1
+}
+
+attributes #1 = { "target-features"="-thumb-mode" }
+attributes #2 = { "target-features"="+thumb-mode" }
+
+; CHECK: Relocations [
+; CHECK-NEXT: Section (3) .rel.text {
+; CHECK-NEXT: 0x0 R_ARM_JUMP24 thumb_fn 0x0
+; CHECK-NEXT: 0x4 R_ARM_THM_JUMP24 arm_fn 0x0
+; CHECK-NEXT: }
diff --git a/test/MC/ARM/big-endian-thumb2-fixup.s b/test/MC/ARM/big-endian-thumb2-fixup.s
index 0aaa26a209fe..4435f6ed79e5 100644
--- a/test/MC/ARM/big-endian-thumb2-fixup.s
+++ b/test/MC/ARM/big-endian-thumb2-fixup.s
@@ -47,3 +47,9 @@ ldst_precel_12_label:
nop
adr_pcrel_12_label:
+@ARM::fixup_t2_so_imm
+.section s_t2_so_imm,"ax",%progbits
+// CHECK-LABEL: Contents of section s_t2_so_imm
+// CHECK: 0000 f1033337
+ add r3, r3,val
+.equ val,0x37373737
diff --git a/test/MC/ARM/t2-modified-immediate-fixup-error1.s b/test/MC/ARM/t2-modified-immediate-fixup-error1.s
new file mode 100644
index 000000000000..f5113a649207
--- /dev/null
+++ b/test/MC/ARM/t2-modified-immediate-fixup-error1.s
@@ -0,0 +1,13 @@
+@ PR28647
+@ RUN: not llvm-mc -triple=thumbv7a-linux-gnueabi -filetype=obj < %s 2>&1 | FileCheck %s
+ .text
+ .syntax unified
+ .balign 2
+
+@ Error with unencodeable immediate
+ add r1, r2, sym0
+@ CHECK: error: out of range immediate fixup value
+ .equ sym0, 0x01abcdef
+.L2:
+ mov r0, .L2
+@ CHECK: error: unsupported relocation on symbol
diff --git a/test/MC/ARM/t2-modified-immediate-fixup-error2.s b/test/MC/ARM/t2-modified-immediate-fixup-error2.s
new file mode 100644
index 000000000000..a5672b5eb1fb
--- /dev/null
+++ b/test/MC/ARM/t2-modified-immediate-fixup-error2.s
@@ -0,0 +1,12 @@
+@ PR28647
+@ RUN: not llvm-mc -triple=thumbv7a-linux-gnueabi -filetype=obj < %s 2>&1 | FileCheck %s
+ .text
+ .syntax unified
+ .balign 2
+
+@ mov with :upper16: or :lower16: should not match mov with modified immediate
+ mov r0, :upper16: sym0
+@ CHECK: error: instruction requires: arm-mode
+ mov r0, :lower16: sym0
+@ CHECK: error: instruction requires: arm-mode
+ .equ sym0, 0x01abcdef
diff --git a/test/MC/ARM/t2-modified-immediate-fixup.s b/test/MC/ARM/t2-modified-immediate-fixup.s
new file mode 100644
index 000000000000..ad0fae2e666e
--- /dev/null
+++ b/test/MC/ARM/t2-modified-immediate-fixup.s
@@ -0,0 +1,45 @@
+@ PR28647
+@ RUN: llvm-mc < %s -triple=thumbv7a-linux-gnueabi -filetype=obj -o - \
+@ RUN: | llvm-objdump --disassemble -triple=thumbv7a-linux-gnueabi - | FileCheck %s
+ .text
+ .syntax unified
+ .balign 2
+@ Thumb2 modified immediate instructions
+ add r1,r1, sym0
+ sub r1,r2, sym1
+ cmp r2, sym2
+ and r4,r4, sym3
+ orr r8,r9, sym4
+ teq r1, sym5
+ tst r1, sym6
+ sbc r1,r1, sym7
+ adc r1,r0, sym8
+@CHECK: add.w r1, r1, #255
+@CHECK: sub.w r1, r2, #16711935
+@CHECK: cmp.w r2, #4278255360
+@CHECK: and r4, r4, #303174162
+@CHECK: orr r8, r9, #2852126720
+@CHECK: teq.w r1, #1426063360
+@CHECK: tst.w r1, #713031680
+@CHECK: sbc r1, r1, #2785280
+@CHECK: adc r1, r0, #340
+
+.L1:
+ sub r3, r3, #.L2 - .L1
+.L2:
+@CHECK: sub.w r3, r3, #4
+
+@ mov without :upper16: or :lower16: should match mov with modified immediate
+ mov r1, sym3
+@CHECK: mov.w r1, #303174162
+
+@ Modified immediate constants
+ .equ sym0, 0x000000ff
+ .equ sym1, 0x00ff00ff
+ .equ sym2, 0xff00ff00
+ .equ sym3, 0x12121212
+ .equ sym4, 0xaa000000
+ .equ sym5, 0x55000000
+ .equ sym6, 0x2a800000
+ .equ sym7, 0x002a8000
+ .equ sym8, 0x00000154
diff --git a/test/MC/ARM/thumb2-diagnostics.s b/test/MC/ARM/thumb2-diagnostics.s
index 76b4cf12626b..ca917a0502dc 100644
--- a/test/MC/ARM/thumb2-diagnostics.s
+++ b/test/MC/ARM/thumb2-diagnostics.s
@@ -76,10 +76,8 @@
@ CHECK-ERRORS: error: branch target out of range
foo2:
- mov r0, foo2
movw r0, foo2
movt r0, foo2
-@ CHECK-ERRORS: error: instruction requires: arm-mode
@ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
@ CHECK-ERRORS: ^
@ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
diff --git a/test/MC/AsmParser/empty-comment.s b/test/MC/AsmParser/empty-comment.s
new file mode 100644
index 000000000000..57df820007ca
--- /dev/null
+++ b/test/MC/AsmParser/empty-comment.s
@@ -0,0 +1,4 @@
+ #RUN: llvm-mc -preserve-comments -n -triple i386-linux-gnu < %s > %t
+ .text
+foo:
+ nop # \ No newline at end of file
diff --git a/test/MC/Disassembler/Mips/micromips-dsp/valid.txt b/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
index f3d6f3dc0367..a373bcd9d6a3 100644
--- a/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
+++ b/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
@@ -94,7 +94,7 @@
0x00 0x01 0x70 0x7c # CHECK: mtlo $1, $ac1
0x00 0x22 0xf1 0x3c # CHECK: raddu.w.qb $1, $2
0x00 0x20 0x86 0x7c # CHECK: rddsp $1, 2
-0x02 0x00 0x08 0x3d # CHECK: repl.ph $1, 512
+0x00 0x02 0x08 0x3d # CHECK: repl.ph $1, 2
0x00 0x30 0x05 0xfc # CHECK: repl.qb $1, 128
0x00 0x22 0x03 0x3c # CHECK: replv.ph $1, $2
0x00 0x22 0x13 0x3c # CHECK: replv.qb $1, $2
diff --git a/test/MC/ELF/ARM/clang-section.s b/test/MC/ELF/ARM/clang-section.s
new file mode 100644
index 000000000000..0b0d27c4ceb1
--- /dev/null
+++ b/test/MC/ELF/ARM/clang-section.s
@@ -0,0 +1,399 @@
+// RUN: llvm-mc -filetype=obj -triple arm-eabi %s -o - | llvm-readobj -s -t | FileCheck %s
+// Test that global variables and functions are assigned correct section.
+ .text
+ .syntax unified
+ .eabi_attribute 67, "2.09" @ Tag_conformance
+ .eabi_attribute 6, 1 @ Tag_CPU_arch
+ .eabi_attribute 8, 1 @ Tag_ARM_ISA_use
+ .eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use
+ .eabi_attribute 20, 1 @ Tag_ABI_FP_denormal
+ .eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions
+ .eabi_attribute 23, 3 @ Tag_ABI_FP_number_model
+ .eabi_attribute 34, 1 @ Tag_CPU_unaligned_access
+ .eabi_attribute 24, 1 @ Tag_ABI_align_needed
+ .eabi_attribute 25, 1 @ Tag_ABI_align_preserved
+ .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format
+ .eabi_attribute 18, 4 @ Tag_ABI_PCS_wchar_t
+ .eabi_attribute 26, 2 @ Tag_ABI_enum_size
+ .eabi_attribute 14, 0 @ Tag_ABI_PCS_R9_use
+ .section my_text.1,"ax",%progbits
+ .globl foo
+ .p2align 2
+ .type foo,%function
+ .code 32 @ @foo
+foo:
+ .fnstart
+@ BB#0: @ %entry
+ ldr r0, .LCPI0_0
+ ldr r0, [r0]
+ mov pc, lr
+ .p2align 2
+@ BB#1:
+.LCPI0_0:
+ .long b
+.Lfunc_end0:
+ .size foo, .Lfunc_end0-foo
+ .cantunwind
+ .fnend
+
+ .section my_text.2,"ax",%progbits
+ .globl goo
+ .p2align 2
+ .type goo,%function
+ .code 32 @ @goo
+goo:
+ .fnstart
+@ BB#0: @ %entry
+ .save {r11, lr}
+ push {r11, lr}
+ ldr r0, .LCPI1_0
+ ldr r1, .LCPI1_1
+ bl zoo
+ pop {r11, lr}
+ mov pc, lr
+ .p2align 2
+@ BB#1:
+.LCPI1_0:
+ .long _ZL1g
+.LCPI1_1:
+ .long _ZZ3gooE7lstat_h
+.Lfunc_end1:
+ .size goo, .Lfunc_end1-goo
+ .cantunwind
+ .fnend
+
+ .text
+ .globl hoo
+ .p2align 2
+ .type hoo,%function
+ .code 32 @ @hoo
+hoo:
+ .fnstart
+@ BB#0: @ %entry
+ ldr r0, .LCPI2_0
+ ldr r0, [r0]
+ mov pc, lr
+ .p2align 2
+@ BB#1:
+.LCPI2_0:
+ .long b
+.Lfunc_end2:
+ .size hoo, .Lfunc_end2-hoo
+ .cantunwind
+ .fnend
+
+ .type a,%object @ @a
+ .section my_bss.1,"aw",%nobits
+ .globl a
+ .p2align 2
+a:
+ .long 0 @ 0x0
+ .size a, 4
+
+ .type b,%object @ @b
+ .section my_data.1,"aw",%progbits
+ .globl b
+ .p2align 2
+b:
+ .long 1 @ 0x1
+ .size b, 4
+
+ .type c,%object @ @c
+ .section my_bss.1,"aw",%nobits
+ .globl c
+ .p2align 2
+c:
+ .zero 16
+ .size c, 16
+
+ .type d,%object @ @d
+ .globl d
+ .p2align 1
+d:
+ .zero 10
+ .size d, 10
+
+ .type e,%object @ @e
+ .section my_data.1,"aw",%progbits
+ .globl e
+ .p2align 1
+e:
+ .short 0 @ 0x0
+ .short 0 @ 0x0
+ .short 1 @ 0x1
+ .short 0 @ 0x0
+ .short 0 @ 0x0
+ .short 0 @ 0x0
+ .size e, 12
+
+ .type f,%object @ @f
+ .section my_rodata.1,"a",%progbits
+ .globl f
+ .p2align 2
+f:
+ .long 2 @ 0x2
+ .size f, 4
+
+ .type h,%object @ @h
+ .bss
+ .globl h
+ .p2align 2
+h:
+ .long 0 @ 0x0
+ .size h, 4
+
+ .type i,%object @ @i
+ .section my_bss.2,"aw",%nobits
+ .globl i
+ .p2align 2
+i:
+ .long 0 @ 0x0
+ .size i, 4
+
+ .type j,%object @ @j
+ .section my_rodata.1,"a",%progbits
+ .globl j
+ .p2align 2
+j:
+ .long 4 @ 0x4
+ .size j, 4
+
+ .type k,%object @ @k
+ .section my_bss.2,"aw",%nobits
+ .globl k
+ .p2align 2
+k:
+ .long 0 @ 0x0
+ .size k, 4
+
+ .type _ZZ3gooE7lstat_h,%object @ @_ZZ3gooE7lstat_h
+ .p2align 2
+_ZZ3gooE7lstat_h:
+ .long 0 @ 0x0
+ .size _ZZ3gooE7lstat_h, 4
+
+ .type _ZL1g,%object @ @_ZL1g
+ .section my_bss.1,"aw",%nobits
+ .p2align 2
+_ZL1g:
+ .zero 8
+ .size _ZL1g, 8
+
+ .type l,%object @ @l
+ .section my_data.2,"aw",%progbits
+ .globl l
+ .p2align 2
+l:
+ .long 5 @ 0x5
+ .size l, 4
+
+ .type m,%object @ @m
+ .section my_rodata.2,"a",%progbits
+ .globl m
+ .p2align 2
+m:
+ .long 6 @ 0x6
+ .size m, 4
+
+ .type n,%object @ @n
+ .bss
+ .globl n
+ .p2align 2
+n:
+ .long 0 @ 0x0
+ .size n, 4
+
+ .type o,%object @ @o
+ .data
+ .globl o
+ .p2align 2
+o:
+ .long 6 @ 0x6
+ .size o, 4
+
+ .type p,%object @ @p
+ .section .rodata,"a",%progbits
+ .globl p
+ .p2align 2
+p:
+ .long 7 @ 0x7
+ .size p, 4
+
+
+ .ident "clang version 5.0.0 (http://llvm.org/git/clang.git 254242a3ad440307fb451093a429c71ea9a8c888) (http://llvm.org/git/llvm.git 3c8daefbe3d1672ac1dae775b211f881f0063038)"
+ .section ".note.GNU-stack","",%progbits
+ .eabi_attribute 30, 1 @ Tag_ABI_optimization_goals
+
+//CHECK: Section {
+//CHECK: Name: .text
+//CHECK: Type: SHT_PROGBITS (0x1)
+//CHECK: Flags [ (0x6)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: SHF_EXECINSTR (0x4)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: my_text.1
+//CHECK: Type: SHT_PROGBITS (0x1)
+//CHECK: Flags [ (0x6)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: SHF_EXECINSTR (0x4)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: my_text.2
+//CHECK: Type: SHT_PROGBITS (0x1)
+//CHECK: Flags [ (0x6)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: SHF_EXECINSTR (0x4)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: my_bss.1
+//CHECK: Type: SHT_NOBITS (0x8)
+//CHECK: Flags [ (0x3)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: SHF_WRITE (0x1)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: my_data.1
+//CHECK: Type: SHT_PROGBITS (0x1)
+//CHECK: Flags [ (0x3)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: SHF_WRITE (0x1)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: my_rodata.1
+//CHECK: Type: SHT_PROGBITS (0x1)
+//CHECK: Flags [ (0x2)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: .bss
+//CHECK: Type: SHT_NOBITS (0x8)
+//CHECK: Flags [ (0x3)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: SHF_WRITE (0x1)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: my_bss.2
+//CHECK: Type: SHT_NOBITS (0x8)
+//CHECK: Flags [ (0x3)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: SHF_WRITE (0x1)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: my_data.2
+//CHECK: Type: SHT_PROGBITS (0x1)
+//CHECK: Flags [ (0x3)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: SHF_WRITE (0x1)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: my_rodata.2
+//CHECK: Type: SHT_PROGBITS (0x1)
+//CHECK: Flags [ (0x2)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: .data
+//CHECK: Type: SHT_PROGBITS (0x1)
+//CHECK: Flags [ (0x3)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: SHF_WRITE (0x1)
+//CHECK: ]
+//CHECK: }
+//CHECK: Section {
+//CHECK: Name: .rodata
+//CHECK: Type: SHT_PROGBITS (0x1)
+//CHECK: Flags [ (0x2)
+//CHECK: SHF_ALLOC (0x2)
+//CHECK: ]
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: _ZL1g
+//CHECK: Section: my_bss.1 (0xE)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: _ZZ3gooE7lstat_h
+//CHECK: Section: my_bss.2 (0x12)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: a
+//CHECK: Section: my_bss.1 (0xE)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: b
+//CHECK: Section: my_data.1 (0xF)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: c
+//CHECK: Section: my_bss.1 (0xE)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: d
+//CHECK: Section: my_bss.1 (0xE)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: e
+//CHECK: Section: my_data.1 (0xF)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: f
+//CHECK: Section: my_rodata.1 (0x10)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: foo
+//CHECK: Section: my_text.1 (0x4)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: goo
+//CHECK: Section: my_text.2 (0x8)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: h
+//CHECK: Section: .bss (0x11)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: hoo
+//CHECK: Section: .text (0x2)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: i
+//CHECK: Section: my_bss.2 (0x12)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: j
+//CHECK: Section: my_rodata.1 (0x10)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: k
+//CHECK: Section: my_bss.2 (0x12)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: l
+//CHECK: Section: my_data.2 (0x13)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: m
+//CHECK: Section: my_rodata.2 (0x14)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: n
+//CHECK: Section: .bss (0x11)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: o
+//CHECK: Section: .data (0x15)
+//CHECK: }
+//CHECK: Symbol {
+//CHECK: Name: p
+//CHECK: Section: .rodata (0x16)
+//CHECK: }
diff --git a/test/MC/MachO/alias.s b/test/MC/MachO/alias.s
new file mode 100644
index 000000000000..aec04c63b68f
--- /dev/null
+++ b/test/MC/MachO/alias.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -triple x86_64-apple-macosx10.12.0 %s -filetype=obj | llvm-readobj -r | FileCheck %s
+
+l_a:
+l_b = l_a
+l_c = l_b
+ .long l_c
+
+// CHECK: Relocations [
+// CHECK-NEXT: Section __text {
+// CHECK-NEXT: 0x0 0 2 1 X86_64_RELOC_UNSIGNED 0 l_c
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/variable-exprs.s b/test/MC/MachO/variable-exprs.s
index 5369622d8cfe..380e7e75cc75 100644
--- a/test/MC/MachO/variable-exprs.s
+++ b/test/MC/MachO/variable-exprs.s
@@ -110,8 +110,8 @@ Lt0_x = Lt0_a - Lt0_b
// CHECK-I386: 0x2C 0 2 0 GENERIC_RELOC_VANILLA 0 __data
// CHECK-I386: 0x28 0 2 0 GENERIC_RELOC_VANILLA 0 __data
// CHECK-I386: 0x24 0 2 1 GENERIC_RELOC_VANILLA 0 d3
-// CHECK-I386: 0x20 0 2 1 GENERIC_RELOC_VANILLA 0 d2
-// CHECK-I386: 0x1C 0 2 1 GENERIC_RELOC_VANILLA 0 d
+// CHECK-I386: 0x20 0 2 1 GENERIC_RELOC_VANILLA 0 d{{$}}
+// CHECK-I386: 0x1C 0 2 1 GENERIC_RELOC_VANILLA 0 d{{$}}
// CHECK-I386: 0x18 0 2 n/a GENERIC_RELOC_VANILLA 1 0x5
// CHECK-I386: 0x14 0 2 0 GENERIC_RELOC_VANILLA 0 __data
// CHECK-I386: 0x10 0 2 0 GENERIC_RELOC_VANILLA 0 __data
@@ -319,8 +319,8 @@ Lt0_x = Lt0_a - Lt0_b
// CHECK-X86_64: 0x2C 0 2 1 X86_64_RELOC_UNSIGNED 0 g
// CHECK-X86_64: 0x28 0 2 1 X86_64_RELOC_UNSIGNED 0 f
// CHECK-X86_64: 0x24 0 2 1 X86_64_RELOC_UNSIGNED 0 d3
-// CHECK-X86_64: 0x20 0 2 1 X86_64_RELOC_UNSIGNED 0 d2
-// CHECK-X86_64: 0x1C 0 2 1 X86_64_RELOC_UNSIGNED 0 d
+// CHECK-X86_64: 0x20 0 2 1 X86_64_RELOC_UNSIGNED 0 d{{$}}
+// CHECK-X86_64: 0x1C 0 2 1 X86_64_RELOC_UNSIGNED 0 d{{$}}
// CHECK-X86_64: 0x18 0 2 1 X86_64_RELOC_UNSIGNED 0 a
// CHECK-X86_64: 0x14 0 2 1 X86_64_RELOC_UNSIGNED 0 e
// CHECK-X86_64: 0x10 0 2 1 X86_64_RELOC_UNSIGNED 0 b
diff --git a/test/MC/Mips/dsp/invalid.s b/test/MC/Mips/dsp/invalid.s
index 1d50b829985c..f58a44560c54 100644
--- a/test/MC/Mips/dsp/invalid.s
+++ b/test/MC/Mips/dsp/invalid.s
@@ -31,8 +31,8 @@
shilo $ac1, -64 # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
repl.qb $2, -1 # CHECK: :[[@LINE]]:15: error: expected 8-bit unsigned immediate
repl.qb $2, 256 # CHECK: :[[@LINE]]:15: error: expected 8-bit unsigned immediate
- repl.ph $2, -1 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
- repl.ph $2, 1024 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ repl.ph $2, -513 # CHECK: :[[@LINE]]:15: error: expected 10-bit signed immediate
+ repl.ph $2, 512 # CHECK: :[[@LINE]]:15: error: expected 10-bit signed immediate
rddsp $2, -1 # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
rddsp $2, 1024 # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
wrdsp $5, -1 # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
diff --git a/test/MC/Mips/micromips-dsp/invalid.s b/test/MC/Mips/micromips-dsp/invalid.s
index 8e6fedbf0b94..05fc77440d3e 100644
--- a/test/MC/Mips/micromips-dsp/invalid.s
+++ b/test/MC/Mips/micromips-dsp/invalid.s
@@ -1,6 +1,8 @@
# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dsp 2>%t1
# RUN: FileCheck %s < %t1
+ repl.ph $2, -513 # CHECK: :[[@LINE]]:15: error: expected 10-bit signed immediate
+ repl.ph $2, 512 # CHECK: :[[@LINE]]:15: error: expected 10-bit signed immediate
shll.ph $3, $4, 16 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
shll.ph $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
shll_s.ph $3, $4, 16 # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
diff --git a/test/MC/Mips/micromips-dsp/valid.s b/test/MC/Mips/micromips-dsp/valid.s
index d1f5d0f3ae8d..ed279f3eb539 100644
--- a/test/MC/Mips/micromips-dsp/valid.s
+++ b/test/MC/Mips/micromips-dsp/valid.s
@@ -95,7 +95,7 @@
mtlo $1, $ac1 # CHECK: mtlo $1, $ac1 # encoding: [0x00,0x01,0x70,0x7c]
raddu.w.qb $1, $2 # CHECK: raddu.w.qb $1, $2 # encoding: [0x00,0x22,0xf1,0x3c]
rddsp $1, 2 # CHECK: rddsp $1, 2 # encoding: [0x00,0x20,0x86,0x7c]
- repl.ph $1, 512 # CHECK: repl.ph $1, 512 # encoding: [0x02,0x00,0x08,0x3d]
+ repl.ph $1, 2 # CHECK: repl.ph $1, 2 # encoding: [0x00,0x02,0x08,0x3d]
repl.qb $1, 128 # CHECK: repl.qb $1, 128 # encoding: [0x00,0x30,0x05,0xfc]
replv.ph $1, $2 # CHECK: replv.ph $1, $2 # encoding: [0x00,0x22,0x03,0x3c]
replv.qb $1, $2 # CHECK: replv.qb $1, $2 # encoding: [0x00,0x22,0x13,0x3c]
diff --git a/test/MC/WebAssembly/reloc-code.ll b/test/MC/WebAssembly/reloc-code.ll
index 5c794400fa09..5fcd9b403811 100644
--- a/test/MC/WebAssembly/reloc-code.ll
+++ b/test/MC/WebAssembly/reloc-code.ll
@@ -36,16 +36,6 @@ entry:
; CHECK-NEXT: Addend: 0
; CHECK-NEXT: }
; CHECK-NEXT: Relocation {
-; CHECK-NEXT: Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
-; CHECK-NEXT: Offset: 0x2D
-; CHECK-NEXT: Index: 0x0
-; CHECK-NEXT: }
-; CHECK-NEXT: Relocation {
-; CHECK-NEXT: Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
-; CHECK-NEXT: Offset: 0x34
-; CHECK-NEXT: Index: 0x1
-; CHECK-NEXT: }
-; CHECK-NEXT: Relocation {
; CHECK-NEXT: Type: R_WEBASSEMBLY_TYPE_INDEX_LEB (6)
; CHECK-NEXT: Offset: 0x1A
; CHECK-NEXT: Index: 0x1
@@ -55,5 +45,15 @@ entry:
; CHECK-NEXT: Offset: 0x24
; CHECK-NEXT: Index: 0x0
; CHECK-NEXT: }
+; CHECK-NEXT: Relocation {
+; CHECK-NEXT: Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
+; CHECK-NEXT: Offset: 0x2D
+; CHECK-NEXT: Index: 0x0
+; CHECK-NEXT: }
+; CHECK-NEXT: Relocation {
+; CHECK-NEXT: Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
+; CHECK-NEXT: Offset: 0x34
+; CHECK-NEXT: Index: 0x1
+; CHECK-NEXT: }
; CHECK-NEXT: }
; CHECK-NEXT: ]
diff --git a/test/Object/AMDGPU/elf-definitions.yaml b/test/Object/AMDGPU/elf-definitions.yaml
index 819786aa1902..07fe8c62dc47 100644
--- a/test/Object/AMDGPU/elf-definitions.yaml
+++ b/test/Object/AMDGPU/elf-definitions.yaml
@@ -3,15 +3,12 @@
# CHECK: Format: ELF64-amdgpu-hsacobj
# CHECK: Arch: amdgcn
-# CHECK: Machine: EM_AMDGPU (0xE0)
-# CHECK: Sections [
-# CHECK: Section {
-# CHECK: Name: .shf_amdgpu
-# CHECK: Flags [ (0xF00000)
-# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000)
-# CHECK: SHF_AMDGPU_HSA_CODE (0x400000)
-# CHECK: SHF_AMDGPU_HSA_GLOBAL (0x100000)
-# CHECK: SHF_AMDGPU_HSA_READONLY (0x200000)
+# CHECK: ElfHeader {
+# CHECK: Ident {
+# CHECK: OS/ABI: AMDGPU_HSA (0x40)
+# CHECK: ABIVersion: 0
+# CHECK: }
+# CHECK: Machine: EM_AMDGPU (0xE0)
# CHECK: }
--- !ELF
@@ -21,10 +18,4 @@ FileHeader:
Type: ET_REL
Machine: EM_AMDGPU
OSABI: ELFOSABI_AMDGPU_HSA
-
-Sections:
- - Name: .shf_amdgpu
- Type: SHT_PROGBITS
- Flags: [ SHF_AMDGPU_HSA_GLOBAL, SHF_AMDGPU_HSA_READONLY,
- SHF_AMDGPU_HSA_CODE, SHF_AMDGPU_HSA_AGENT]
...
diff --git a/test/Object/objc-imageinfo-coff.ll b/test/Object/objc-imageinfo-coff.ll
new file mode 100644
index 000000000000..017d5ac003c9
--- /dev/null
+++ b/test/Object/objc-imageinfo-coff.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple x86_64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s
+; REQUIRES: x86-registered-target
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !".objc_imageinfo$B"}
+!3 = !{i32 1, !"Objective-C Garbage Collection", i32 2}
+
+; CHECK: .section .objc_imageinfo$B,"dr"
+; CHECK: OBJC_IMAGE_INFO:
+; CHECK: .long 0
+; CHECK: .long 2
+
diff --git a/test/Object/objc-imageinfo-elf.ll b/test/Object/objc-imageinfo-elf.ll
new file mode 100644
index 000000000000..f7484fa39be1
--- /dev/null
+++ b/test/Object/objc-imageinfo-elf.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple x86_64-unknown-linux-gnu -filetype asm -o - %s | FileCheck %s
+; REQUIRES: x86-registered-target
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"objc_imageinfo"}
+!3 = !{i32 1, !"Objective-C Garbage Collection", i32 2}
+
+; CHECK: .section objc_imageinfo
+; CHECK: OBJC_IMAGE_INFO:
+; CHECK: .long 0
+; CHECK: .long 2
+
diff --git a/test/Object/objc-imageinfo-macho.ll b/test/Object/objc-imageinfo-macho.ll
new file mode 100644
index 000000000000..97c36699e5df
--- /dev/null
+++ b/test/Object/objc-imageinfo-macho.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple x86_64-apple-ios -filetype asm -o - %s | FileCheck %s
+; REQUIRES: x86-registered-target
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA,__objc_imageinfo,regular,no_dead_strip"}
+!3 = !{i32 1, !"Objective-C Garbage Collection", i32 2}
+
+; CHECK: .section __DATA,__objc_imageinfo,regular,no_dead_strip
+; CHECK: L_OBJC_IMAGE_INFO:
+; CHECK: .long 0
+; CHECK: .long 2
+
diff --git a/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
new file mode 100644
index 000000000000..328e8cc2907f
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
@@ -0,0 +1,337 @@
+; RUN: opt -S -codegenprepare -mtriple=i686-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
+
+define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp2(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
+ ret i32 %call
+}
+
+define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp3(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+ ret i32 %call
+}
+
+define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp4(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+ ret i32 %call
+}
+
+define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp5(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+ ret i32 %call
+}
+
+define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp6(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+ ret i32 %call
+}
+
+define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp7(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+ ret i32 %call
+}
+
+define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp8(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+ ret i32 %call
+}
+
+define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp9(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+ ret i32 %call
+}
+
+define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp10(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+ ret i32 %call
+}
+
+define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp11(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+ ret i32 %call
+}
+
+define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp12(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+ ret i32 %call
+}
+
+define i32 @cmp13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp13(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+ ret i32 %call
+}
+
+define i32 @cmp14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp14(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+ ret i32 %call
+}
+
+define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp15(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+ ret i32 %call
+}
+
+define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp16(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+; ALL-NEXT: ret i32 [[CALL]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+ ret i32 %call
+}
+
+define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq2(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq3(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq4(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq5(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq6(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq7(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq8(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq9(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq10(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq11(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq12(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq13(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq14(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq15(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
+; ALL-LABEL: @cmp_eq16(
+; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT: ret i32 [[CONV]]
+;
+ %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+ %cmp = icmp eq i32 %call, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
diff --git a/test/Transforms/ConstProp/sse.ll b/test/Transforms/ConstProp/sse.ll
index cc37c96c1ff1..ad0a62e42062 100644
--- a/test/Transforms/ConstProp/sse.ll
+++ b/test/Transforms/ConstProp/sse.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -constprop -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
define i1 @test_sse_cvts_exact() nounwind readnone {
; CHECK-LABEL: @test_sse_cvts_exact(
diff --git a/test/Transforms/DCE/calls-errno.ll b/test/Transforms/DCE/calls-errno.ll
index 22ea04aa8f36..415caae0fe60 100644
--- a/test/Transforms/DCE/calls-errno.ll
+++ b/test/Transforms/DCE/calls-errno.ll
@@ -72,6 +72,10 @@ entry:
; CHECK-NEXT: %cos2 = call double @cos(double 0x7FF0000000000000)
%cos2 = call double @cos(double 0x7FF0000000000000)
+; cos(0) nobuiltin may have side effects
+; CHECK-NEXT: %cos3 = call double @cos(double 0.000000e+00)
+ %cos3 = call double @cos(double 0.000000e+00) nobuiltin
+
; pow(0, 1) is 0
%pow1 = call double @pow(double 0x7FF0000000000000, double 1.000000e+00)
diff --git a/test/Transforms/GVNSink/sink-common-code.ll b/test/Transforms/GVNSink/sink-common-code.ll
index d9e757cd10fc..02b1eb7fe259 100644
--- a/test/Transforms/GVNSink/sink-common-code.ll
+++ b/test/Transforms/GVNSink/sink-common-code.ll
@@ -54,33 +54,36 @@ if.end:
declare i32 @foo(i32, i32) nounwind readnone
-define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) {
-entry:
- br i1 %flag, label %if.then, label %if.else
-
-if.then:
- %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
- %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
- br label %if.end
-
-if.else:
- %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
- %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
- br label %if.end
-
-if.end:
- %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
- %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ]
- %ret = add i32 %xx, %yy
- ret i32 %ret
-}
-
-; CHECK-LABEL: test3
-; CHECK: select
-; CHECK: call
-; CHECK: call
-; CHECK: add
-; CHECK-NOT: br
+; FIXME: The test failes when the original order of the
+; candidates with the same cost is preserved.
+;
+;define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) {
+;entry:
+; br i1 %flag, label %if.then, label %if.else
+;
+;if.then:
+; %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+; %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+; br label %if.end
+;
+;if.else:
+; %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+; %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+; br label %if.end
+;
+;if.end:
+; %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+; %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ]
+; %ret = add i32 %xx, %yy
+; ret i32 %ret
+;}
+;
+; -CHECK-LABEL: test3
+; -CHECK: select
+; -CHECK: call
+; -CHECK: call
+; -CHECK: add
+; -CHECK-NOT: br
define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) {
entry:
diff --git a/test/Transforms/IRCE/correct-loop-info.ll b/test/Transforms/IRCE/correct-loop-info.ll
new file mode 100644
index 000000000000..3c26b47f154f
--- /dev/null
+++ b/test/Transforms/IRCE/correct-loop-info.ll
@@ -0,0 +1,182 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -irce < %s -S | FileCheck %s
+
+; REQUIRES: asserts
+
+; IRCE creates the pre and post loop, and invokes the
+; canonicalizing these loops to LCSSA and loop-simplfy structure. Make sure that the update to the loopinfo does not
+; incorrectly change the header while canonicalizing these pre/post loops. We
+; were incorrectly updating LI when the split loop is a subloop as in the case below.
+source_filename = "correct-loop-info.ll"
+
+define void @baz() personality i32* ()* @ham {
+; CHECK-LABEL: @baz(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[OUTERHEADER:%.*]]
+; CHECK: outerheader:
+; CHECK-NEXT: [[TMP:%.*]] = icmp slt i32 undef, 84
+; CHECK-NEXT: br i1 [[TMP]], label [[BB2:%.*]], label [[BB16:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 false, label [[INNERHEADER_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
+; CHECK: innerheader.preloop.preheader:
+; CHECK-NEXT: br label [[INNERHEADER_PRELOOP:%.*]]
+; CHECK: mainloop:
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[INDVAR_END:%.*]], -1
+; CHECK-NEXT: br i1 [[TMP0]], label [[INNERHEADER_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK: innerheader.preheader:
+; CHECK-NEXT: br label [[INNERHEADER:%.*]]
+; CHECK: innerheader:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP6:%.*]], [[BB8:%.*]] ], [ [[TMP4_PRELOOP_COPY:%.*]], [[INNERHEADER_PREHEADER]] ]
+; CHECK-NEXT: invoke void @pluto()
+; CHECK-NEXT: to label [[BB5:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit.split-lp
+; CHECK: bb5:
+; CHECK-NEXT: [[TMP6]] = add i32 [[TMP4]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 0
+; CHECK-NEXT: br i1 true, label [[BB8]], label [[EXIT3_LOOPEXIT5:%.*]]
+; CHECK: bb8:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP6]], 84
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP6]], -1
+; CHECK-NEXT: br i1 [[TMP1]], label [[INNERHEADER]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK: main.exit.selector:
+; CHECK-NEXT: [[TMP6_LCSSA:%.*]] = phi i32 [ [[TMP6]], [[BB8]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP6_LCSSA]], 84
+; CHECK-NEXT: br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[BB13:%.*]]
+; CHECK: main.pseudo.exit:
+; CHECK-NEXT: [[TMP4_COPY:%.*]] = phi i32 [ [[TMP4_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT: [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT: br label [[POSTLOOP:%.*]]
+; CHECK: outer_exiting.loopexit:
+; CHECK-NEXT: [[LPAD_LOOPEXIT:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: br label [[OUTER_EXITING:%.*]]
+; CHECK: outer_exiting.loopexit.split-lp.loopexit:
+; CHECK-NEXT: [[LPAD_LOOPEXIT2:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp
+; CHECK: outer_exiting.loopexit.split-lp.loopexit.split-lp:
+; CHECK-NEXT: %lpad.loopexit.split-lp3 = landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp
+; CHECK: outer_exiting.loopexit.split-lp:
+; CHECK-NEXT: br label [[OUTER_EXITING]]
+; CHECK: outer_exiting:
+; CHECK-NEXT: switch i32 undef, label [[EXIT2:%.*]] [
+; CHECK-NEXT: i32 142, label [[BB14:%.*]]
+; CHECK-NEXT: i32 448, label [[EXIT:%.*]]
+; CHECK-NEXT: ]
+; CHECK: exit3.loopexit:
+; CHECK-NEXT: br label [[EXIT3:%.*]]
+; CHECK: exit3.loopexit4:
+; CHECK-NEXT: br label [[EXIT3]]
+; CHECK: exit3.loopexit5:
+; CHECK-NEXT: br label [[EXIT3]]
+; CHECK: exit3:
+; CHECK-NEXT: ret void
+; CHECK: bb13.loopexit:
+; CHECK-NEXT: br label [[BB13]]
+; CHECK: bb13:
+; CHECK-NEXT: unreachable
+; CHECK: bb14:
+; CHECK-NEXT: br label [[OUTERHEADER]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: bb16:
+; CHECK-NEXT: ret void
+; CHECK: exit2:
+; CHECK-NEXT: ret void
+; CHECK: innerheader.preloop:
+; CHECK-NEXT: [[TMP4_PRELOOP:%.*]] = phi i32 [ [[TMP6_PRELOOP:%.*]], [[BB8_PRELOOP:%.*]] ], [ undef, [[INNERHEADER_PRELOOP_PREHEADER]] ]
+; CHECK-NEXT: invoke void @pluto()
+; CHECK-NEXT: to label [[BB5_PRELOOP:%.*]] unwind label [[OUTER_EXITING_LOOPEXIT:%.*]]
+; CHECK: bb5.preloop:
+; CHECK-NEXT: [[TMP6_PRELOOP]] = add i32 [[TMP4_PRELOOP]], 1
+; CHECK-NEXT: [[TMP7_PRELOOP:%.*]] = icmp ult i32 [[TMP6_PRELOOP]], 0
+; CHECK-NEXT: br i1 [[TMP7_PRELOOP]], label [[BB8_PRELOOP]], label [[EXIT3_LOOPEXIT:%.*]]
+; CHECK: bb8.preloop:
+; CHECK-NEXT: [[TMP9_PRELOOP:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], 84
+; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], -1
+; CHECK-NEXT: br i1 [[TMP3]], label [[INNERHEADER_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop !0, !irce.loop.clone !5
+; CHECK: preloop.exit.selector:
+; CHECK-NEXT: [[TMP6_PRELOOP_LCSSA:%.*]] = phi i32 [ [[TMP6_PRELOOP]], [[BB8_PRELOOP]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP6_PRELOOP_LCSSA]], 84
+; CHECK-NEXT: br i1 [[TMP4]], label [[PRELOOP_PSEUDO_EXIT]], label [[BB13]]
+; CHECK: preloop.pseudo.exit:
+; CHECK-NEXT: [[TMP4_PRELOOP_COPY]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT: br label [[MAINLOOP]]
+; CHECK: postloop:
+; CHECK-NEXT: br label [[INNERHEADER_POSTLOOP:%.*]]
+; CHECK: innerheader.postloop:
+; CHECK-NEXT: [[TMP4_POSTLOOP:%.*]] = phi i32 [ [[TMP6_POSTLOOP:%.*]], [[BB8_POSTLOOP:%.*]] ], [ [[TMP4_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT: invoke void @pluto()
+; CHECK-NEXT: to label [[BB5_POSTLOOP:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit
+; CHECK: bb5.postloop:
+; CHECK-NEXT: [[TMP6_POSTLOOP]] = add i32 [[TMP4_POSTLOOP]], 1
+; CHECK-NEXT: [[TMP7_POSTLOOP:%.*]] = icmp ult i32 [[TMP6_POSTLOOP]], 0
+; CHECK-NEXT: br i1 [[TMP7_POSTLOOP]], label [[BB8_POSTLOOP]], label [[EXIT3_LOOPEXIT4:%.*]]
+; CHECK: bb8.postloop:
+; CHECK-NEXT: [[TMP9_POSTLOOP:%.*]] = icmp slt i32 [[TMP6_POSTLOOP]], 84
+; CHECK-NEXT: br i1 [[TMP9_POSTLOOP]], label [[INNERHEADER_POSTLOOP]], label [[BB13_LOOPEXIT:%.*]], !llvm.loop !6, !irce.loop.clone !5
+;
+bb:
+ br label %outerheader
+
+outerheader: ; preds = %bb14, %bb
+ %tmp = icmp slt i32 undef, 84
+ br i1 %tmp, label %bb2, label %bb16
+
+bb2: ; preds = %outerheader
+ br label %innerheader
+
+innerheader: ; preds = %bb8, %bb2
+ %tmp4 = phi i32 [ %tmp6, %bb8 ], [ undef, %bb2 ]
+ invoke void @pluto()
+ to label %bb5 unwind label %outer_exiting
+
+bb5: ; preds = %innerheader
+ %tmp6 = add i32 %tmp4, 1
+ %tmp7 = icmp ult i32 %tmp6, 0
+ br i1 %tmp7, label %bb8, label %exit3
+
+bb8: ; preds = %bb5
+ %tmp9 = icmp slt i32 %tmp6, 84
+ br i1 %tmp9, label %innerheader, label %bb13
+
+outer_exiting: ; preds = %innerheader
+ %tmp11 = landingpad { i8*, i32 }
+ cleanup
+ switch i32 undef, label %exit2 [
+ i32 142, label %bb14
+ i32 448, label %exit
+ ]
+
+exit3: ; preds = %bb5
+ ret void
+
+bb13: ; preds = %bb8
+ unreachable
+
+bb14: ; preds = %outer_exiting
+ br label %outerheader
+
+exit: ; preds = %outer_exiting
+ ret void
+
+bb16: ; preds = %outerheader
+ ret void
+
+exit2: ; preds = %outer_exiting
+ ret void
+}
+
+declare i32* @ham()
+
+declare void @pluto()
+
+!0 = distinct !{!0, !1, !2, !3, !4}
+!1 = !{!"llvm.loop.unroll.disable"}
+!2 = !{!"llvm.loop.vectorize.enable", i1 false}
+!3 = !{!"llvm.loop.licm_versioning.disable"}
+!4 = !{!"llvm.loop.distribute.enable", i1 false}
+!5 = !{}
+!6 = distinct !{!6, !1, !2, !3, !4}
diff --git a/test/Transforms/IndVarSimplify/lftr_disabled.ll b/test/Transforms/IndVarSimplify/lftr_disabled.ll
new file mode 100644
index 000000000000..c647d123dd75
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr_disabled.ll
@@ -0,0 +1,28 @@
+; LFTR should not eliminate the need for the computation of i*i completely
+; due to LFTR is disabled.
+; RUN: opt < %s -indvars -dce -disable-lftr -S | FileCheck %s
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+
+@A = external global i32 ; <i32*> [#uses=1]
+
+define i32 @quadratic_setlt() {
+; CHECK-LABEL: @quadratic_setlt(
+; CHECK: mul
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %i = phi i32 [ 7, %entry ], [ %i.next, %loop ] ; <i32> [#uses=5]
+ %i.next = add i32 %i, 1 ; <i32> [#uses=1]
+ store i32 %i, i32* @A
+ %i2 = mul i32 %i, %i ; <i32> [#uses=1]
+ %c = icmp slt i32 %i2, 1000 ; <i1> [#uses=1]
+ br i1 %c, label %loop, label %loopexit
+
+loopexit: ; preds = %loop
+ ret i32 %i
+}
+
diff --git a/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll b/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll
new file mode 100644
index 000000000000..1b3240620571
--- /dev/null
+++ b/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -infer-address-spaces %s | FileCheck %s
+
+%struct.S = type { [5 x i32] }
+
+$g1 = comdat any
+
+@g1 = linkonce_odr addrspace(3) global %struct.S zeroinitializer, comdat, align 4
+
+; CHECK-LABEL: @foo(
+; CHECK: %x0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2
+; CHECK: %idxprom.i = zext i32 %x0 to i64
+; CHECK: %arrayidx.i = getelementptr %struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 %idxprom.i
+; CHECK: tail call void @f1(i32* %arrayidx.i, i32 undef) #0
+; CHECK: %x1 = load i32, i32* getelementptr (%struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 0), align 4
+; CHECK: %L.sroa.0.0.insert.ext.i = zext i32 %x1 to i64
+; CHECK: tail call void @f2(i64* null, i64 %L.sroa.0.0.insert.ext.i) #0
+; CHECK: ret void
+define void @foo() local_unnamed_addr #0 {
+entry:
+ %x0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2
+ %idxprom.i = zext i32 %x0 to i64
+ %arrayidx.i = getelementptr %struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 %idxprom.i
+ tail call void @f1(i32* %arrayidx.i, i32 undef) #0
+ %x1 = load i32, i32* getelementptr (%struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 0), align 4
+ %L.sroa.0.0.insert.ext.i = zext i32 %x1 to i64
+ tail call void @f2(i64* null, i64 %L.sroa.0.0.insert.ext.i) #0
+ ret void
+}
+
+declare void @f1(i32*, i32) local_unnamed_addr #0
+declare void @f2(i64*, i64) local_unnamed_addr #0
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index b98644cd2dd4..f34ed0841132 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -91,3 +91,27 @@ define i32 @test() {
ret i32 %e
; CHECK: }
}
+
+; Inliner shouldn't delete calls it can't inline, even if they're trivially dead
+; CHECK-LABEL: @outer4(
+define void @outer4(void ()* %inner4) {
+entry:
+; CHECK: call void %inner4()
+ call void %inner4() nounwind readnone
+ ret void
+}
+
+declare void @inner5_inner()
+
+define void @inner5(void ()* %x) {
+ call void %x() nounwind readnone
+ ret void
+}
+
+; Inliner shouldn't delete calls it can't inline, even if they're trivially dead and temporarily indirect
+; CHECK-LABEL: @outer5(
+define void @outer5() {
+; CHECK: call void @inner5_inner(
+ call void @inner5(void ()* @inner5_inner)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/constant-fold-libfunc.ll b/test/Transforms/InstCombine/constant-fold-libfunc.ll
new file mode 100644
index 000000000000..c969b65a4e74
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-libfunc.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare double @acos(double)
+
+; Check that functions without any function attributes are simplified.
+
+define double @test_simplify_acos() {
+; CHECK-LABEL: @test_simplify_acos
+ %pi = call double @acos(double -1.000000e+00)
+; CHECK-NOT: call double @acos
+; CHECK: ret double 0x400921FB54442D18
+ ret double %pi
+}
+
+define double @test_acos_nobuiltin() {
+; CHECK-LABEL: @test_acos_nobuiltin
+ %pi = call double @acos(double -1.000000e+00) nobuiltin
+; CHECK: call double @acos(double -1.000000e+00)
+ ret double %pi
+}
diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll
index 29f774c5f62b..fb25c2342798 100644
--- a/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -260,3 +260,26 @@ bb2:
%ins2 = insertelement <4 x float> %ins1, float %ext1, i32 3
ret <4 x float> %ins2
}
+
+; Don't insert extractelements from the wider vector before the def of the index operand.
+
+define <4 x i32> @extractelt_insertion(<2 x i32> %x, i32 %y) {
+; CHECK-LABEL: @extractelt_insertion(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: [[C:%.*]] = add i32 [[Y:%.*]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 [[C]]
+; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: [[RET:%.*]] = select i1 [[E]], <4 x i32> [[B]], <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x i32> [[RET]]
+;
+entry:
+ %a = extractelement <2 x i32> %x, i32 1
+ %b = insertelement <4 x i32> zeroinitializer, i32 %a, i64 3
+ %c = add i32 %y, 3
+ %d = extractelement <2 x i32> %x, i32 %c
+ %e = icmp eq i32 %d, 0
+ %ret = select i1 %e, <4 x i32> %b, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 78c98955353e..1b1ed606868f 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -21,6 +21,7 @@ declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
declare double @llvm.cos.f64(double %Val) nounwind readonly
declare double @llvm.sin.f64(double %Val) nounwind readonly
declare double @llvm.floor.f64(double %Val) nounwind readonly
@@ -282,6 +283,16 @@ define i32 @cttz(i32 %a) {
ret i32 %count
}
+define <2 x i32> @cttz_vec(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_vec(
+; CHECK-NEXT: ret <2 x i32> <i32 3, i32 3>
+;
+ %or = or <2 x i32> %a, <i32 8, i32 8>
+ %and = and <2 x i32> %or, <i32 -8, i32 -8>
+ %count = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %and, i1 true) nounwind readnone
+ ret <2 x i32> %count
+}
+
define i1 @cttz_knownbits(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits(
; CHECK-NEXT: ret i1 false
@@ -292,6 +303,16 @@ define i1 @cttz_knownbits(i32 %arg) {
ret i1 %res
}
+define <2 x i1> @cttz_knownbits_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @cttz_knownbits_vec(
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
+;
+ %or = or <2 x i32> %arg, <i32 4, i32 4>
+ %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
+ %res = icmp eq <2 x i32> %cnt, <i32 4, i32 4>
+ ret <2 x i1> %res
+}
+
define i1 @cttz_knownbits2(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits2(
; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4
@@ -305,6 +326,19 @@ define i1 @cttz_knownbits2(i32 %arg) {
ret i1 %res
}
+define <2 x i1> @cttz_knownbits2_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @cttz_knownbits2_vec(
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], <i32 4, i32 4>
+; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 2, i32 2>
+; CHECK-NEXT: ret <2 x i1> [[RES]]
+;
+ %or = or <2 x i32> %arg, <i32 4, i32 4>
+ %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
+ %res = icmp eq <2 x i32> %cnt, <i32 2, i32 2>
+ ret <2 x i1> %res
+}
+
; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @cttz_knownbits3(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits3(
@@ -319,6 +353,20 @@ define i1 @cttz_knownbits3(i32 %arg) {
ret i1 %res
}
+; TODO: The icmp is unnecessary given the known bits of the input.
+define <2 x i1> @cttz_knownbits3_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @cttz_knownbits3_vec(
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], <i32 4, i32 4>
+; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 3, i32 3>
+; CHECK-NEXT: ret <2 x i1> [[RES]]
+;
+ %or = or <2 x i32> %arg, <i32 4, i32 4>
+ %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
+ %res = icmp eq <2 x i32> %cnt, <i32 3, i32 3>
+ ret <2 x i1> %res
+}
+
define i8 @ctlz(i8 %a) {
; CHECK-LABEL: @ctlz(
; CHECK-NEXT: ret i8 2
@@ -329,6 +377,16 @@ define i8 @ctlz(i8 %a) {
ret i8 %count
}
+define <2 x i8> @ctlz_vec(<2 x i8> %a) {
+; CHECK-LABEL: @ctlz_vec(
+; CHECK-NEXT: ret <2 x i8> <i8 2, i8 2>
+;
+ %or = or <2 x i8> %a, <i8 32, i8 32>
+ %and = and <2 x i8> %or, <i8 63, i8 63>
+ %count = tail call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %and, i1 true) nounwind readnone
+ ret <2 x i8> %count
+}
+
define i1 @ctlz_knownbits(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits(
; CHECK-NEXT: ret i1 false
@@ -339,6 +397,16 @@ define i1 @ctlz_knownbits(i8 %arg) {
ret i1 %res
}
+define <2 x i1> @ctlz_knownbits_vec(<2 x i8> %arg) {
+; CHECK-LABEL: @ctlz_knownbits_vec(
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
+;
+ %or = or <2 x i8> %arg, <i8 32, i8 32>
+ %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone
+ %res = icmp eq <2 x i8> %cnt, <i8 4, i8 4>
+ ret <2 x i1> %res
+}
+
define i1 @ctlz_knownbits2(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits2(
; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32
@@ -352,6 +420,19 @@ define i1 @ctlz_knownbits2(i8 %arg) {
ret i1 %res
}
+define <2 x i1> @ctlz_knownbits2_vec(<2 x i8> %arg) {
+; CHECK-LABEL: @ctlz_knownbits2_vec(
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], <i8 32, i8 32>
+; CHECK-NEXT: [[CNT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true)
+; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[CNT]], <i8 2, i8 2>
+; CHECK-NEXT: ret <2 x i1> [[RES]]
+;
+ %or = or <2 x i8> %arg, <i8 32, i8 32>
+ %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone
+ %res = icmp eq <2 x i8> %cnt, <i8 2, i8 2>
+ ret <2 x i1> %res
+}
+
; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @ctlz_knownbits3(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits3(
@@ -366,6 +447,20 @@ define i1 @ctlz_knownbits3(i8 %arg) {
ret i1 %res
}
+; TODO: The icmp is unnecessary given the known bits of the input.
+define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) {
+; CHECK-LABEL: @ctlz_knownbits3_vec(
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], <i8 32, i8 32>
+; CHECK-NEXT: [[CNT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true)
+; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[CNT]], <i8 3, i8 3>
+; CHECK-NEXT: ret <2 x i1> [[RES]]
+;
+ %or = or <2 x i8> %arg, <i8 32, i8 32>
+ %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone
+ %res = icmp eq <2 x i8> %cnt, <i8 3, i8 3>
+ ret <2 x i1> %res
+}
+
define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
%lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone
%lz.cmp = icmp eq i32 %lz, 32
@@ -406,7 +501,7 @@ define <2 x i1> @cttz_cmp_vec(<2 x i32> %a) {
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> %a, zeroinitializer
; CHECK-NEXT: ret <2 x i1> [[CMP]]
;
- %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind readnone
+ %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) nounwind readnone
%cmp = icmp ne <2 x i32> %x, <i32 32, i32 32>
ret <2 x i1> %cmp
}
@@ -434,6 +529,14 @@ define i32 @ctlz_undef(i32 %Value) {
ret i32 %ctlz
}
+define <2 x i32> @ctlz_undef_vec(<2 x i32> %Value) {
+; CHECK-LABEL: @ctlz_undef_vec(
+; CHECK-NEXT: ret <2 x i32> undef
+;
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> zeroinitializer, i1 true)
+ ret <2 x i32> %ctlz
+}
+
define i32 @ctlz_make_undef(i32 %a) {
%or = or i32 %a, 8
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false)
@@ -444,13 +547,31 @@ define i32 @ctlz_make_undef(i32 %a) {
; CHECK-NEXT: ret i32 %ctlz
}
+define <2 x i32> @ctlz_make_undef_vec(<2 x i32> %a) {
+; CHECK-LABEL: @ctlz_make_undef_vec(
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
+;
+ %or = or <2 x i32> %a, <i32 8, i32 8>
+ %ctlz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %or, i1 false)
+ ret <2 x i32> %ctlz
+}
+
define i32 @cttz_undef(i32 %Value) nounwind {
; CHECK-LABEL: @cttz_undef(
; CHECK-NEXT: ret i32 undef
;
%cttz = call i32 @llvm.cttz.i32(i32 0, i1 true)
ret i32 %cttz
+}
+define <2 x i32> @cttz_undef_vec(<2 x i32> %Value) nounwind {
+; CHECK-LABEL: @cttz_undef_vec(
+; CHECK-NEXT: ret <2 x i32> undef
+;
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> zeroinitializer, i1 true)
+ ret <2 x i32> %cttz
}
define i32 @cttz_make_undef(i32 %a) {
@@ -463,6 +584,17 @@ define i32 @cttz_make_undef(i32 %a) {
; CHECK-NEXT: ret i32 %cttz
}
+define <2 x i32> @cttz_make_undef_vec(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_make_undef_vec(
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
+; CHECK-NEXT: [[CTTZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
+;
+ %or = or <2 x i32> %a, <i32 8, i32 8>
+ %cttz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 false)
+ ret <2 x i32> %cttz
+}
+
define i32 @ctlz_select(i32 %Value) nounwind {
; CHECK-LABEL: @ctlz_select(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 %Value, i1 false)
@@ -472,7 +604,17 @@ define i32 @ctlz_select(i32 %Value) nounwind {
%ctlz = call i32 @llvm.ctlz.i32(i32 %Value, i1 true)
%s = select i1 %tobool, i32 %ctlz, i32 32
ret i32 %s
+}
+define <2 x i32> @ctlz_select_vec(<2 x i32> %Value) nounwind {
+; CHECK-LABEL: @ctlz_select_vec(
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VALUE:%.*]], i1 false)
+; CHECK-NEXT: ret <2 x i32> [[TMP1]]
+;
+ %tobool = icmp ne <2 x i32> %Value, zeroinitializer
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %Value, i1 true)
+ %s = select <2 x i1> %tobool, <2 x i32> %ctlz, <2 x i32> <i32 32, i32 32>
+ ret <2 x i32> %s
}
define i32 @cttz_select(i32 %Value) nounwind {
@@ -484,7 +626,17 @@ define i32 @cttz_select(i32 %Value) nounwind {
%cttz = call i32 @llvm.cttz.i32(i32 %Value, i1 true)
%s = select i1 %tobool, i32 %cttz, i32 32
ret i32 %s
+}
+define <2 x i32> @cttz_select_vec(<2 x i32> %Value) nounwind {
+; CHECK-LABEL: @cttz_select_vec(
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[VALUE:%.*]], i1 false)
+; CHECK-NEXT: ret <2 x i32> [[TMP1]]
+;
+ %tobool = icmp ne <2 x i32> %Value, zeroinitializer
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %Value, i1 true)
+ %s = select <2 x i1> %tobool, <2 x i32> %cttz, <2 x i32> <i32 32, i32 32>
+ ret <2 x i32> %s
}
define i1 @overflow_div_add(i32 %v1, i32 %v2) nounwind {
diff --git a/test/Transforms/InstCombine/lshr.ll b/test/Transforms/InstCombine/lshr.ll
index 0cad7f833ab6..71b25177162b 100644
--- a/test/Transforms/InstCombine/lshr.ll
+++ b/test/Transforms/InstCombine/lshr.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n8:16:32:64"
+
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
@@ -100,12 +102,9 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) {
ret <2 x i8> %lshr
}
-; FIXME: The bool bit got smeared across a wide val, but then we zero'd out those bits. This is just a zext.
-
define i16 @bool_zext(i1 %x) {
; CHECK-LABEL: @bool_zext(
-; CHECK-NEXT: [[SEXT:%.*]] = sext i1 %x to i16
-; CHECK-NEXT: [[HIBIT:%.*]] = lshr i16 [[SEXT]], 15
+; CHECK-NEXT: [[HIBIT:%.*]] = zext i1 %x to i16
; CHECK-NEXT: ret i16 [[HIBIT]]
;
%sext = sext i1 %x to i16
@@ -115,8 +114,7 @@ define i16 @bool_zext(i1 %x) {
define <2 x i8> @bool_zext_splat(<2 x i1> %x) {
; CHECK-LABEL: @bool_zext_splat(
-; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> %x to <2 x i8>
-; CHECK-NEXT: [[HIBIT:%.*]] = lshr <2 x i8> [[SEXT]], <i8 7, i8 7>
+; CHECK-NEXT: [[HIBIT:%.*]] = zext <2 x i1> %x to <2 x i8>
; CHECK-NEXT: ret <2 x i8> [[HIBIT]]
;
%sext = sext <2 x i1> %x to <2 x i8>
@@ -148,23 +146,34 @@ define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) {
ret <2 x i8> %hibit
}
-; FIXME: All of the replicated sign bits are wiped out by the lshr. This could be lshr+zext.
-
-define i16 @fake_sext(i3 %x) {
+define i18 @fake_sext(i3 %x) {
; CHECK-LABEL: @fake_sext(
-; CHECK-NEXT: [[SEXT:%.*]] = sext i3 %x to i16
-; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[SEXT]], 15
-; CHECK-NEXT: ret i16 [[SH]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i3 %x, 2
+; CHECK-NEXT: [[SH:%.*]] = zext i3 [[TMP1]] to i18
+; CHECK-NEXT: ret i18 [[SH]]
;
- %sext = sext i3 %x to i16
- %sh = lshr i16 %sext, 15
- ret i16 %sh
+ %sext = sext i3 %x to i18
+ %sh = lshr i18 %sext, 17
+ ret i18 %sh
+}
+
+; Avoid the transform if it would change the shift from a legal to illegal type.
+
+define i32 @fake_sext_but_should_not_change_type(i3 %x) {
+; CHECK-LABEL: @fake_sext_but_should_not_change_type(
+; CHECK-NEXT: [[SEXT:%.*]] = sext i3 %x to i32
+; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[SEXT]], 31
+; CHECK-NEXT: ret i32 [[SH]]
+;
+ %sext = sext i3 %x to i32
+ %sh = lshr i32 %sext, 31
+ ret i32 %sh
}
define <2 x i8> @fake_sext_splat(<2 x i3> %x) {
; CHECK-LABEL: @fake_sext_splat(
-; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i3> %x to <2 x i8>
-; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[SEXT]], <i8 7, i8 7>
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i3> %x, <i3 2, i3 2>
+; CHECK-NEXT: [[SH:%.*]] = zext <2 x i3> [[TMP1]] to <2 x i8>
; CHECK-NEXT: ret <2 x i8> [[SH]]
;
%sext = sext <2 x i3> %x to <2 x i8>
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index 68daac65ee6b..c7d10e251b4a 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -199,6 +199,16 @@ define i256 @test_cttz() {
ret i256 %x
}
+declare <2 x i256> @llvm.cttz.v2i256(<2 x i256> %src, i1 %is_zero_undef)
+
+define <2 x i256> @test_cttz_vec() {
+; CHECK-LABEL: @test_cttz_vec(
+; CHECK-NEXT: ret <2 x i256> <i256 1, i256 1>
+;
+ %x = call <2 x i256> @llvm.cttz.v2i256(<2 x i256> <i256 10, i256 10>, i1 false)
+ ret <2 x i256> %x
+}
+
declare i256 @llvm.ctpop.i256(i256 %src)
define i256 @test_ctpop() {
@@ -410,3 +420,26 @@ define <8 x i32> @masked_load_undef_mask(<8 x i32>* %V) {
declare noalias i8* @malloc(i64)
declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
+
+declare double @llvm.powi.f64(double, i32)
+declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32)
+
+define double @constant_fold_powi() nounwind uwtable ssp {
+; CHECK-LABEL: @constant_fold_powi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret double 9.000000e+00
+;
+entry:
+ %0 = call double @llvm.powi.f64(double 3.00000e+00, i32 2)
+ ret double %0
+}
+
+define <2 x double> @constant_fold_powi_vec() nounwind uwtable ssp {
+; CHECK-LABEL: @constant_fold_powi_vec(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <2 x double> <double 9.000000e+00, double 2.500000e+01>
+;
+entry:
+ %0 = call <2 x double> @llvm.powi.v2f64(<2 x double> <double 3.00000e+00, double 5.00000e+00>, i32 2)
+ ret <2 x double> %0
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 20ebd36991a5..2fe079019161 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -69,7 +69,7 @@ define i1 @gep4() {
define i1 @PR31262() {
; CHECK-LABEL: @PR31262(
-; CHECK-NEXT: ret i1 icmp uge (i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 0, i64 undef), i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0))
+; CHECK-NEXT: ret i1 icmp uge (i32* getelementptr ([1 x i32], [1 x i32]* @a, i32 0, i32 undef), i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0))
;
%idx = getelementptr inbounds [1 x i32], [1 x i32]* @a, i64 0, i64 undef
%cmp = icmp uge i32* %idx, getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0)
diff --git a/test/Transforms/InstSimplify/simplify-nested-bitcast.ll b/test/Transforms/InstSimplify/simplify-nested-bitcast.ll
new file mode 100644
index 000000000000..b7ee79415a22
--- /dev/null
+++ b/test/Transforms/InstSimplify/simplify-nested-bitcast.ll
@@ -0,0 +1,54 @@
+; RUN: opt -always-inline -S %s | FileCheck %s
+%0 = type { i64, i64, i8 addrspace(1)*, i8 addrspace(1)* }
+%__aaa_struct = type { { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* }, %0, [17 x i8], { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* }, %0, [18 x i8] }
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* }
+
+@__aaa_struct_ptr = external addrspace(1) global %__aaa_struct
+@__aaa_const_init = constant %__aaa_struct { { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* } { i8** null, i32 1342177280, i32 0, i8* bitcast (i32 (i8 addrspace(4)*, i32 addrspace(1)*)* @bl0_block_invoke to i8*), %struct.__block_descriptor addrspace(1)* bitcast (%0 addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 1) to %struct.__block_descriptor addrspace(1)*) }, %0 { i64 0, i64 32, i8 addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 2, i32 0), i8 addrspace(1)* null }, [17 x i8] c"bl0_block_invoke\00", { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* } { i8** null, i32 1342177280, i32 0, i8* bitcast (i32 (i8 addrspace(4)*, i32 addrspace(1)*)* @__f1_block_invoke to i8*), %struct.__block_descriptor addrspace(1)* bitcast (%0 addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 4) to %struct.__block_descriptor addrspace(1)*) }, %0 { i64 0, i64 32, i8 addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 5, i32 0), i8 addrspace(1)* null }, [18 x i8] c"__f1_block_invoke\00" }
+
+; Function Attrs: alwaysinline norecurse nounwind readonly
+define i32 @bl0_block_invoke(i8 addrspace(4)* nocapture readnone, i32 addrspace(1)* nocapture readonly) #0 {
+entry:
+ %2 = load i32, i32 addrspace(1)* %1, align 4
+ %mul = shl nsw i32 %2, 1
+ ret i32 %mul
+}
+
+; Function Attrs: alwaysinline nounwind
+define i32 @f0(i32 addrspace(1)*, i32 (i32 addrspace(1)*) addrspace(4)*) #1 {
+entry:
+ %block.literal = bitcast i32 (i32 addrspace(1)*) addrspace(4)* %1 to %struct.__block_literal_generic addrspace(4)*
+ %2 = getelementptr inbounds %struct.__block_literal_generic, %struct.__block_literal_generic addrspace(4)* %block.literal, i64 0, i32 3
+ %3 = bitcast i32 (i32 addrspace(1)*) addrspace(4)* %1 to i8 addrspace(4)*
+ %4 = bitcast i8* addrspace(4)* %2 to i32 (i8 addrspace(4)*, i32 addrspace(1)*)* addrspace(4)*
+ %5 = load i32 (i8 addrspace(4)*, i32 addrspace(1)*)*, i32 (i8 addrspace(4)*, i32 addrspace(1)*)* addrspace(4)* %4, align 8
+ %call = tail call i32 %5(i8 addrspace(4)* %3, i32 addrspace(1)* %0) #2
+ ret i32 %call
+}
+
+; CHECK-LABEL: define void @f1
+; CHECK: %1 = load i32 (i8 addrspace(4)*, i32 addrspace(1)*)*, i32 (i8 addrspace(4)*, i32 addrspace(1)*)* addrspace(4)* bitcast (i8* addrspace(4)* getelementptr (%__aaa_struct, %__aaa_struct addrspace(4)* addrspacecast (%__aaa_struct addrspace(1)* @__aaa_struct_ptr to %__aaa_struct addrspace(4)*), i64 0, i32 0, i32 3) to i32 (i8 addrspace(4)*, i32 addrspace(1)*)* addrspace(4)*), align 8
+
+; Function Attrs: alwaysinline nounwind
+define void @f1(i32 addrspace(1)*) #1 {
+entry:
+ %call = tail call i32 @f0(i32 addrspace(1)* %0, i32 (i32 addrspace(1)*) addrspace(4)* addrspacecast (i32 (i32 addrspace(1)*) addrspace(1)* bitcast (%__aaa_struct addrspace(1)* @__aaa_struct_ptr to i32 (i32 addrspace(1)*) addrspace(1)*) to i32 (i32 addrspace(1)*) addrspace(4)*)) #3
+ store i32 %call, i32 addrspace(1)* %0, align 4
+ %call1 = tail call i32 @f0(i32 addrspace(1)* %0, i32 (i32 addrspace(1)*) addrspace(4)* addrspacecast (i32 (i32 addrspace(1)*) addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* } addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 3) to i32 (i32 addrspace(1)*) addrspace(1)*) to i32 (i32 addrspace(1)*) addrspace(4)*)) #3
+ store i32 %call1, i32 addrspace(1)* %0, align 4
+ ret void
+}
+
+; Function Attrs: alwaysinline norecurse nounwind readonly
+define i32 @__f1_block_invoke(i8 addrspace(4)* nocapture readnone, i32 addrspace(1)* nocapture readonly) #0 {
+entry:
+ %2 = load i32, i32 addrspace(1)* %1, align 4
+ %add = add nsw i32 %2, 1
+ ret i32 %add
+}
+
+attributes #0 = { alwaysinline norecurse nounwind readonly }
+attributes #1 = { alwaysinline nounwind }
+attributes #2 = { nobuiltin nounwind }
+attributes #3 = { nobuiltin }
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
index b8e61a05cc0c..cdf4732d4b5e 100644
--- a/test/Transforms/InstSimplify/vector_gep.ll
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -51,7 +51,7 @@ define <4 x i8*> @test5() {
ret <4 x i8*> %gep
; CHECK-LABEL: @test5
-; CHECK-NEXT: ret <4 x i8*> getelementptr (i8, <4 x i8*> <i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 4 to i8*)>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: ret <4 x i8*> getelementptr (i8, <4 x i8*> <i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 4 to i8*)>, <4 x i64> <i64 1, i64 1, i64 1, i64 1>)
}
@v = global [24 x [42 x [3 x i32]]] zeroinitializer, align 16
diff --git a/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll b/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
index bf2009e28a7d..1f444b3748a5 100644
--- a/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
+++ b/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx -interleaved-access -S | FileCheck %s
-; This file tests the function `llvm::lowerInterleavedLoad`.
+; This file tests the function `llvm::lowerInterleavedLoad/Store`.
define <4 x double> @load_factorf64_4(<16 x double>* %ptr) {
; CHECK-LABEL: @load_factorf64_4(
@@ -102,4 +102,63 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) {
ret <4 x double> %mul
}
+define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
+; CHECK-LABEL: @store_factorf64_4(
+; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16
+; CHECK-NEXT: ret void
+;
+ %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
+ ret void
+}
+
+define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) {
+; CHECK-LABEL: @store_factori64_4(
+; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], <16 x i64>* [[PTR:%.*]], align 16
+; CHECK-NEXT: ret void
+;
+ %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ store <16 x i64> %interleaved.vec, <16 x i64>* %ptr, align 16
+ ret void
+}
+
+define void @store_factorf64_4_revMask(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
+; CHECK-LABEL: @store_factorf64_4_revMask(
+; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> <i32 12, i32 8, i32 4, i32 0, i32 13, i32 9, i32 5, i32 1, i32 14, i32 10, i32 6, i32 2, i32 15, i32 11, i32 7, i32 3>
+; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16
+; CHECK-NEXT: ret void
+;
+ %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 12, i32 8, i32 4, i32 0, i32 13, i32 9, i32 5, i32 1, i32 14, i32 10, i32 6, i32 2, i32 15, i32 11, i32 7, i32 3>
+ store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
+ ret void
+}
+
+define void @store_factorf64_4_arbitraryMask(<16 x double>* %ptr, <16 x double> %v0, <16 x double> %v1, <16 x double> %v2, <16 x double> %v3) {
+; CHECK-LABEL: @store_factorf64_4_arbitraryMask(
+; CHECK-NEXT: [[S0:%.*]] = shufflevector <16 x double> [[V0:%.*]], <16 x double> [[V1:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x double> [[V2:%.*]], <16 x double> [[V3:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <16 x i32> <i32 4, i32 32, i32 16, i32 8, i32 5, i32 33, i32 17, i32 9, i32 6, i32 34, i32 18, i32 10, i32 7, i32 35, i32 19, i32 11>
+; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16
+; CHECK-NEXT: ret void
+;
+ %s0 = shufflevector <16 x double> %v0, <16 x double> %v1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %s1 = shufflevector <16 x double> %v2, <16 x double> %v3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %interleaved.vec = shufflevector <32 x double> %s0, <32 x double> %s1, <16 x i32> <i32 4, i32 32, i32 16, i32 8, i32 5, i32 33, i32 17, i32 9, i32 6, i32 34, i32 18, i32 10, i32 7, i32 35, i32 19, i32 11>
+ store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
+ ret void
+}
diff --git a/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll b/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll
new file mode 100644
index 000000000000..ec93847178b5
--- /dev/null
+++ b/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll
@@ -0,0 +1,452 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+;; memcpy.atomic formation (atomic load & store)
+define void @test1(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test1(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i8, i32 10000
+ %Dest = alloca i8, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+ %V = load atomic i8, i8* %I.0.014 unordered, align 1
+ store atomic i8 %V, i8* %DestI unordered, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation (atomic store, normal load)
+define void @test2(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test2(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i8, i32 10000
+ %Dest = alloca i8, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+ %V = load i8, i8* %I.0.014, align 1
+ store atomic i8 %V, i8* %DestI unordered, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation rejection (atomic store, normal load w/ no align)
+define void @test2b(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test2b(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i8, i32 10000
+ %Dest = alloca i8, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+ %V = load i8, i8* %I.0.014
+ store atomic i8 %V, i8* %DestI unordered, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation rejection (atomic store, normal load w/ bad align)
+define void @test2c(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test2c(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i32, i32 10000
+ %Dest = alloca i32, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+ %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+ %V = load i32, i32* %I.0.014, align 2
+ store atomic i32 %V, i32* %DestI unordered, align 4
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation rejection (atomic store w/ bad align, normal load)
+define void @test2d(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test2d(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i32, i32 10000
+ %Dest = alloca i32, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+ %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+ %V = load i32, i32* %I.0.014, align 4
+ store atomic i32 %V, i32* %DestI unordered, align 2
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+
+;; memcpy.atomic formation (normal store, atomic load)
+define void @test3(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test3(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i8, i32 10000
+ %Dest = alloca i8, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+ %V = load atomic i8, i8* %I.0.014 unordered, align 1
+ store i8 %V, i8* %DestI, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation rejection (normal store w/ no align, atomic load)
+define void @test3b(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test3b(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i8, i32 10000
+ %Dest = alloca i8, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+ %V = load atomic i8, i8* %I.0.014 unordered, align 1
+ store i8 %V, i8* %DestI
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation rejection (normal store, atomic load w/ bad align)
+define void @test3c(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test3c(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i32, i32 10000
+ %Dest = alloca i32, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+ %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+ %V = load atomic i32, i32* %I.0.014 unordered, align 2
+ store i32 %V, i32* %DestI, align 4
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation rejection (normal store w/ bad align, atomic load)
+define void @test3d(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test3d(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i32, i32 10000
+ %Dest = alloca i32, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+ %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+ %V = load atomic i32, i32* %I.0.014 unordered, align 4
+ store i32 %V, i32* %DestI, align 2
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+
+;; memcpy.atomic formation rejection (atomic load, ordered-atomic store)
+define void @test4(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test4(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i8, i32 10000
+ %Dest = alloca i8, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+ %V = load atomic i8, i8* %I.0.014 unordered, align 1
+ store atomic i8 %V, i8* %DestI monotonic, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation rejection (ordered-atomic load, unordered-atomic store)
+define void @test5(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test5(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i8, i32 10000
+ %Dest = alloca i8, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+ %V = load atomic i8, i8* %I.0.014 monotonic, align 1
+ store atomic i8 %V, i8* %DestI unordered, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation (atomic load & store) -- element size 2
+define void @test6(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test6(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 %Size, i32 2)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i16, i32 10000
+ %Dest = alloca i16, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
+ %DestI = getelementptr i16, i16* %Dest, i64 %indvar
+ %V = load atomic i16, i16* %I.0.014 unordered, align 2
+ store atomic i16 %V, i16* %DestI unordered, align 2
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation (atomic load & store) -- element size 4
+define void @test7(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test7(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 %Size, i32 4)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i32, i32 10000
+ %Dest = alloca i32, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+ %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+ %V = load atomic i32, i32* %I.0.014 unordered, align 4
+ store atomic i32 %V, i32* %DestI unordered, align 4
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation (atomic load & store) -- element size 8
+define void @test8(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test8(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 %Size, i32 8)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i64, i32 10000
+ %Dest = alloca i64, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i64, i64* %Base, i64 %indvar
+ %DestI = getelementptr i64, i64* %Dest, i64 %indvar
+ %V = load atomic i64, i64* %I.0.014 unordered, align 8
+ store atomic i64 %V, i64* %DestI unordered, align 8
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation rejection (atomic load & store) -- element size 16
+define void @test9(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test9(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 %Size, i32 16)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i128, i32 10000
+ %Dest = alloca i128, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i128, i128* %Base, i64 %indvar
+ %DestI = getelementptr i128, i128* %Dest, i64 %indvar
+ %V = load atomic i128, i128* %I.0.014 unordered, align 16
+ store atomic i128 %V, i128* %DestI unordered, align 16
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;; memcpy.atomic formation rejection (atomic load & store) -- element size 32
+define void @test10(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test10(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i256, i32 10000
+ %Dest = alloca i256, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i256, i256* %Base, i64 %indvar
+ %DestI = getelementptr i256, i256* %Dest, i64 %indvar
+ %V = load atomic i256, i256* %I.0.014 unordered, align 32
+ store atomic i256 %V, i256* %DestI unordered, align 32
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+
+
+; Make sure that atomic memset doesn't get recognized by mistake
+define void @test_nomemset(i8* %Base, i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test_nomemset(
+; CHECK-NOT: call void @llvm.memset
+; CHECK: store
+; CHECK: ret void
+bb.nph: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ store atomic i8 0, i8* %I.0.014 unordered, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Verify that unordered memset_pattern isn't recognized.
+; This is a replica of test11_pattern from basic.ll
+define void @test_nomemset_pattern(i32* nocapture %P) nounwind ssp {
+; CHECK-LABEL: @test_nomemset_pattern(
+; CHECK-NEXT: entry:
+; CHECK-NOT: bitcast
+; CHECK-NOT: memset_pattern
+; CHECK: store atomic
+; CHECK: ret void
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+ %arrayidx = getelementptr i32, i32* %P, i64 %indvar
+ store atomic i32 1, i32* %arrayidx unordered, align 4
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll b/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll
new file mode 100644
index 000000000000..b2528f1c2457
--- /dev/null
+++ b/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll
@@ -0,0 +1,28 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+;; memcpy.atomic formation (atomic load & store) -- element size 2
+;; Will not create call due to a max element size of 0
+define void @test1(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test1(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+ %Base = alloca i16, i32 10000
+ %Dest = alloca i16, i32 10000
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
+ %DestI = getelementptr i16, i16* %Dest, i64 %indvar
+ %V = load atomic i16, i16* %I.0.014 unordered, align 2
+ store atomic i16 %V, i16* %DestI unordered, align 2
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/canonical.ll b/test/Transforms/LoopStrengthReduce/X86/canonical.ll
index 2dafbb408aad..6b6acb868745 100644
--- a/test/Transforms/LoopStrengthReduce/X86/canonical.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/canonical.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -S < %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -lsr-insns-cost=false -S < %s | FileCheck %s
; Check LSR formula canonicalization will put loop invariant regs before
; induction variable of current loop, so exprs involving loop invariant regs
; can be promoted outside of current loop.
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index fb63b66137f3..7c01432914ff 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -163,7 +163,7 @@ for.end: ; preds = %for.body, %entry
; X64: movzbl -3(
;
; X32: foldedidx:
-; X32: movzbl -3(
+; X32: movzbl 400(
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
entry:
br label %for.body
@@ -275,7 +275,7 @@ exit:
;
; X32: @testCmpZero
; X32: %for.body82.us
-; X32: dec
+; X32: cmp
; X32: jne
define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
index a7731bfcec56..deca954fea78 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
; RUN: opt -loop-reduce -S < %s | FileCheck %s
; Strength reduction analysis here relies on IV Users analysis, that
@@ -22,16 +22,16 @@ target triple = "x86_64-apple-macosx"
; CHECK-LABEL: @test2
; CHECK-LABEL: test2.loop:
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
-; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
-; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ 1, %entry ]
+; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, -1
; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
;
; CHECK-LABEL: for.end:
-; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0
+; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next, 0
; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
-; CHECK: %1 = sub i32 0, %sub.us
-; CHECK: %2 = add i32 %1, %lsr.iv.next
-; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2
+; CHECK: %0 = sub i32 0, %sub.us
+; CHECK: %1 = sub i32 %0, %lsr.iv.next
+; CHECK: %sext.us = mul i32 %lsr.iv.next2, %1
; CHECK: %f = ashr i32 %sext.us, 24
; CHECK: ret i32 %f
define i32 @test2() {
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
index 4888536bdf81..7f163500a737 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
; OPT test checks that LSR optimize compare for static counter to compare with 0.
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll
index 3273cb4e6b5b..239cc0233506 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
; OPT checks that LSR prefers less instructions to less registers.
diff --git a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
index b563eb3ad994..e05d5aa3027b 100644
--- a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-reduce -S < %s | FileCheck %s
; Check when we use an outerloop induction variable inside of an innerloop
; induction value expr, LSR can still choose to use single induction variable
@@ -22,18 +23,21 @@ for.body: ; preds = %for.inc, %entry
for.body2.preheader: ; preds = %for.body
br label %for.body2
-; Check LSR only generates one induction variable for for.body2 and the induction
-; variable will be shared by multiple array accesses.
+; Check LSR only generates two induction variables for for.body2 one for compare and
+; one to shared by multiple array accesses.
; CHECK: for.body2:
-; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ 0, %for.body2.preheader ]
+; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ]
+; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ]
; CHECK-NOT: = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ]
-; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* %maxarray, i64 [[LSR]]
-; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* [[SCEVGEP1]], i64 1
+; CHECK: [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64
+; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1
+; CHECK: {{.*}} = load i8, i8* [[SCEVGEP1]], align 1
+; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]]
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP2]], align 1
-; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
-; CHECK: {{.*}} = load i8, i8* [[SCEVGEP3]], align 1
-; CHECK: [[SCEVGEP4:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
-; CHECK: store i8 {{.*}}, i8* [[SCEVGEP4]], align 1
+; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]]
+; CHECK: store i8 {{.*}}, i8* [[SCEVGEP3]], align 1
+; CHECK: [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1
+; CHECK: %exitcond = icmp ne i64 [[LSRNEXT]], 0
; CHECK: br i1 %exitcond, label %for.body2, label %for.inc.loopexit
for.body2: ; preds = %for.body2.preheader, %for.body2
diff --git a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index d06e3fdba39c..1149afe7b9f4 100644
--- a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -5,7 +5,7 @@ target triple = "aarch64"
; CHECK-LABEL: @add_a(
; CHECK: load <16 x i8>, <16 x i8>*
-; CHECK: add nuw nsw <16 x i8>
+; CHECK: add <16 x i8>
; CHECK: store <16 x i8>
; Function Attrs: nounwind
define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
@@ -31,9 +31,37 @@ for.body: ; preds = %entry, %for.body
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
+; Ensure that we preserve nuw/nsw if we're not shrinking the values we're
+; working with.
+; CHECK-LABEL: @add_a1(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_a1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp8 = icmp sgt i32 %len, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %add = add nuw nsw i8 %0, 2
+ %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %add, i8* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
; CHECK-LABEL: @add_b(
; CHECK: load <8 x i16>, <8 x i16>*
-; CHECK: add nuw nsw <8 x i16>
+; CHECK: add <8 x i16>
; CHECK: store <8 x i16>
; Function Attrs: nounwind
define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
@@ -61,7 +89,7 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: @add_c(
; CHECK: load <8 x i8>, <8 x i8>*
-; CHECK: add nuw nsw <8 x i16>
+; CHECK: add <8 x i16>
; CHECK: store <8 x i16>
; Function Attrs: nounwind
define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
@@ -116,12 +144,12 @@ for.body: ; preds = %entry, %for.body
; CHECK-LABEL: @add_e(
; CHECK: load <16 x i8>
; CHECK: shl <16 x i8>
-; CHECK: add nuw nsw <16 x i8>
+; CHECK: add <16 x i8>
; CHECK: or <16 x i8>
-; CHECK: mul nuw nsw <16 x i8>
+; CHECK: mul <16 x i8>
; CHECK: and <16 x i8>
; CHECK: xor <16 x i8>
-; CHECK: mul nuw nsw <16 x i8>
+; CHECK: mul <16 x i8>
; CHECK: store <16 x i8>
define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
entry:
@@ -162,12 +190,12 @@ for.body: ; preds = %for.body, %for.body
; CHECK: load <8 x i16>
; CHECK: trunc <8 x i16>
; CHECK: shl <8 x i8>
-; CHECK: add nsw <8 x i8>
+; CHECK: add <8 x i8>
; CHECK: or <8 x i8>
-; CHECK: mul nuw nsw <8 x i8>
+; CHECK: mul <8 x i8>
; CHECK: and <8 x i8>
; CHECK: xor <8 x i8>
-; CHECK: mul nuw nsw <8 x i8>
+; CHECK: mul <8 x i8>
; CHECK: store <8 x i8>
define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
entry:
diff --git a/test/Transforms/LowerExpectIntrinsic/PR33346.ll b/test/Transforms/LowerExpectIntrinsic/PR33346.ll
new file mode 100644
index 000000000000..ca962fbdc8f3
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/PR33346.ll
@@ -0,0 +1,22 @@
+; RUN: opt -lower-expect -S < %s
+; RUN: opt -passes='function(lower-expect)' -S < %s
+
+define i64 @foo(i64 %arg) #0 {
+bb:
+ %tmp = alloca i64, align 8
+ store i64 %arg, i64* %tmp, align 8
+ %tmp1 = load i64, i64* %tmp, align 8
+ %tmp2 = load i64, i64* %tmp, align 8
+ %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 %tmp2)
+ ret i64 %tmp3
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.expect.i64(i64, i64)
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304723)"}
diff --git a/test/Transforms/LowerTypeTests/simple.ll b/test/Transforms/LowerTypeTests/simple.ll
index cedfcb4a63a0..aae17c05d606 100644
--- a/test/Transforms/LowerTypeTests/simple.ll
+++ b/test/Transforms/LowerTypeTests/simple.ll
@@ -1,5 +1,5 @@
; RUN: opt -S -lowertypetests < %s | FileCheck %s
-; RUN: opt -S -lowertypetests -mtriple=x86_64-apple-macosx10.8.0 < %s | FileCheck -check-prefix=CHECK-DARWIN %s
+; RUN: opt -S -lowertypetests -mtriple=x86_64-apple-macosx10.8.0 < %s | FileCheck %s
; RUN: opt -S -O3 < %s | FileCheck -check-prefix=CHECK-NODISCARD %s
target datalayout = "e-p:32:32"
@@ -39,20 +39,6 @@ target datalayout = "e-p:32:32"
; CHECK: @c = protected alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
; CHECK: @d = alias [2 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
-; CHECK-DARWIN: @aptr = constant i32* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G:@[^ ]*]], i32 0, i32 0)
-@aptr = constant i32* @a
-
-; CHECK-DARWIN: @bptr = constant [63 x i32]* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
-@bptr = constant [63 x i32]* @b
-
-; CHECK-DARWIN: @cptr = constant i32* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
-@cptr = constant i32* @c
-
-; CHECK-DARWIN: @dptr = constant [2 x i32]* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
-@dptr = constant [2 x i32]* @d
-
-; CHECK-DARWIN: [[G]] = private constant
-
; CHECK: @bits{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
; CHECK: @bits.{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
diff --git a/test/Transforms/LowerTypeTests/simplify.ll b/test/Transforms/LowerTypeTests/simplify.ll
new file mode 100644
index 000000000000..cb5ad4a10bfb
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/simplify.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import.yaml < %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK: define i1 @bytearray7(i8* [[p:%.*]])
+define i1 @bytearray7(i8* %p) {
+ ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+ ; CHECK-NEXT: [[sub:%.*]] = sub i64 [[pi]], ptrtoint (i8* @__typeid_bytearray7_global_addr to i64)
+ ; CHECK-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], zext (i8 ptrtoint (i8* @__typeid_bytearray7_align to i8) to i64)
+ ; CHECK-NEXT: [[shl:%.*]] = shl i64 [[sub]], zext (i8 sub (i8 64, i8 ptrtoint (i8* @__typeid_bytearray7_align to i8)) to i64)
+ ; CHECK-NEXT: [[or:%.*]] = or i64 [[lshr]], [[shl]]
+ ; CHECK-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], ptrtoint (i8* @__typeid_bytearray7_size_m1 to i64)
+ ; CHECK-NEXT: br i1 [[ule]], label %[[t1:.*]], label %[[f:.*]]
+
+ ; CHECK: [[t1]]:
+ ; CHECK-NEXT: [[gep:%.*]] = getelementptr i8, i8* @__typeid_bytearray7_byte_array, i64 [[or]]
+ ; CHECK-NEXT: [[load:%.*]] = load i8, i8* [[gep]]
+ ; CHECK-NEXT: [[and:%.*]] = and i8 [[load]], ptrtoint (i8* @__typeid_bytearray7_bit_mask to i8)
+ ; CHECK-NEXT: [[ne:%.*]] = icmp ne i8 [[and]], 0
+ ; CHECK-NEXT: br i1 [[ne]], label %[[t:.*]], label %[[f:.*]]
+
+ ; CHECK: [[t]]:
+ ; CHECK-NEXT: ret i1 true
+
+ ; CHECK: [[f]]:
+ ; CHECK-NEXT: ret i1 false
+ %x = call i1 @llvm.type.test(i8* %p, metadata !"bytearray7")
+ br i1 %x, label %t, label %f
+
+t:
+ ret i1 true
+
+f:
+ ret i1 false
+}
diff --git a/test/Transforms/NewGVN/completeness.ll b/test/Transforms/NewGVN/completeness.ll
index 2b28f12df9d1..1798bfea5fe0 100644
--- a/test/Transforms/NewGVN/completeness.ll
+++ b/test/Transforms/NewGVN/completeness.ll
@@ -395,7 +395,7 @@ define void @test10() {
; CHECK: g:
; CHECK-NEXT: [[N:%.*]] = phi i32* [ [[H:%.*]], [[I:%.*]] ], [ null, [[B:%.*]] ]
; CHECK-NEXT: [[H]] = getelementptr i32, i32* [[N]], i64 1
-; CHECK-NEXT: [[J:%.*]] = icmp eq i32* [[H]], getelementptr (i32, i32* null, i64 8)
+; CHECK-NEXT: [[J:%.*]] = icmp eq i32* [[H]], inttoptr (i64 32 to i32*)
; CHECK-NEXT: br i1 [[J]], label [[C:%.*]], label [[I]]
; CHECK: i:
; CHECK-NEXT: br i1 undef, label [[K:%.*]], label [[G]]
diff --git a/test/Transforms/NewGVN/loadforward.ll b/test/Transforms/NewGVN/loadforward.ll
index d66b5332601f..b4cbcc6b0f4d 100644
--- a/test/Transforms/NewGVN/loadforward.ll
+++ b/test/Transforms/NewGVN/loadforward.ll
@@ -9,8 +9,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
;; Test that we forward the first store to the second load
define i16 @bazinga() {
; CHECK-LABEL: @bazinga(
-; CHECK-NEXT: [[_TMP10:%.*]] = load i16, i16* getelementptr inbounds (%rec11, %rec11* @str, i16 0, i32 1)
-; CHECK-NEXT: store i16 [[_TMP10]], i16* getelementptr inbounds (%rec11, %rec11* @str, i16 0, i32 0)
+; CHECK-NEXT: [[_TMP10:%.*]] = load i16, i16* getelementptr inbounds (%rec11, %rec11* @str, i64 0, i32 1)
+; CHECK-NEXT: store i16 [[_TMP10]], i16* getelementptr inbounds (%rec11, %rec11* @str, i64 0, i32 0)
; CHECK-NEXT: [[_TMP15:%.*]] = icmp eq i16 [[_TMP10]], 3
; CHECK-NEXT: [[_TMP16:%.*]] = select i1 [[_TMP15]], i16 1, i16 0
; CHECK-NEXT: br label [[BB1:%.*]]
diff --git a/test/Transforms/NewGVN/pr32403.ll b/test/Transforms/NewGVN/pr32403.ll
index 505d31a9463e..2552e0e66ab9 100644
--- a/test/Transforms/NewGVN/pr32403.ll
+++ b/test/Transforms/NewGVN/pr32403.ll
@@ -17,8 +17,7 @@ define void @reorder_ref_pic_list() local_unnamed_addr {
; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[REFIDXLX_0]], 1
; CHECK-NEXT: br label [[FOR_BODY8_I:%.*]]
; CHECK: for.body8.i:
-; CHECK-NEXT: [[NIDX_052_I:%.*]] = phi i32 [ [[INC_I]], [[IF_THEN13]] ], [ [[NIDX_052_I]], [[FOR_INC24_I:%.*]] ]
-; CHECK-NEXT: br i1 undef, label [[FOR_INC24_I]], label [[IF_THEN17_I:%.*]]
+; CHECK-NEXT: br i1 undef, label [[FOR_INC24_I:%.*]], label [[IF_THEN17_I:%.*]]
; CHECK: if.then17.i:
; CHECK-NEXT: br label [[FOR_INC24_I]]
; CHECK: for.inc24.i:
diff --git a/test/Transforms/NewGVN/pr32897.ll b/test/Transforms/NewGVN/pr32897.ll
index eb19aa367b72..dcf2af30b239 100644
--- a/test/Transforms/NewGVN/pr32897.ll
+++ b/test/Transforms/NewGVN/pr32897.ll
@@ -7,7 +7,6 @@ define void @tinkywinky(i64* %b) {
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
; CHECK-NEXT: store i64 undef, i64* [[B:%.*]]
-; CHECK-NEXT: [[B2:%.*]] = load i64, i64* [[B]]
; CHECK-NEXT: br i1 undef, label [[BODY]], label [[END:%.*]]
; CHECK: end:
; CHECK-NEXT: br label [[BODY]]
diff --git a/test/Transforms/NewGVN/pr33187.ll b/test/Transforms/NewGVN/pr33187.ll
new file mode 100644
index 000000000000..61e767d36569
--- /dev/null
+++ b/test/Transforms/NewGVN/pr33187.ll
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;; Ensure we don't change after value numbering by accidentally deleting the wrong expression.
+; RUN: opt -newgvn -S %s | FileCheck %s
+define void @fn1() local_unnamed_addr #0 {
+; CHECK-LABEL: @fn1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_COND_PREHEADER:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: br label [[FOR_COND_PREHEADER]]
+; CHECK: for.cond.preheader:
+; CHECK-NEXT: [[H_031:%.*]] = phi i32 [ 5, [[ENTRY:%.*]] ], [ [[H_127:%.*]], [[WHILE_COND:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[H_128:%.*]] = phi i32 [ [[H_031]], [[FOR_COND_PREHEADER]] ], [ [[H_2:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT: br label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: br i1 false, label [[L_LOOPEXIT:%.*]], label [[IF_END:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: br i1 undef, label [[FOR_INC]], label [[IF_END9:%.*]]
+; CHECK: if.end9:
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[H_2]] = phi i32 [ [[H_128]], [[IF_END]] ], [ 0, [[IF_END9]] ]
+; CHECK-NEXT: br i1 undef, label [[WHILE_COND10_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: while.cond10.loopexit:
+; CHECK-NEXT: br label [[WHILE_COND10:%.*]]
+; CHECK: while.cond10:
+; CHECK-NEXT: [[H_127]] = phi i32 [ [[H_126:%.*]], [[IF_END18:%.*]] ], [ [[H_125:%.*]], [[L:%.*]] ], [ [[H_2]], [[WHILE_COND10_LOOPEXIT]] ]
+; CHECK-NEXT: br i1 undef, label [[WHILE_COND]], label [[WHILE_BODY12:%.*]]
+; CHECK: while.body12:
+; CHECK-NEXT: br i1 undef, label [[IF_END18]], label [[L]]
+; CHECK: L.loopexit:
+; CHECK-NEXT: store i8 undef, i8* null
+; CHECK-NEXT: br label [[L]]
+; CHECK: L:
+; CHECK-NEXT: [[H_125]] = phi i32 [ [[H_127]], [[WHILE_BODY12]] ], [ undef, [[L_LOOPEXIT]] ]
+; CHECK-NEXT: br i1 undef, label [[WHILE_COND10]], label [[IF_END18]]
+; CHECK: if.end18:
+; CHECK-NEXT: [[H_126]] = phi i32 [ [[H_125]], [[L]] ], [ [[H_127]], [[WHILE_BODY12]] ]
+; CHECK-NEXT: br label [[WHILE_COND10]]
+;
+entry:
+ br label %for.cond.preheader
+
+while.cond: ; preds = %while.cond10
+ br label %for.cond.preheader
+
+for.cond.preheader: ; preds = %while.cond, %entry
+ %h.031 = phi i32 [ 5, %entry ], [ %h.127, %while.cond ]
+ br label %for.body
+
+for.body: ; preds = %for.inc, %for.cond.preheader
+ %h.128 = phi i32 [ %h.031, %for.cond.preheader ], [ %h.2, %for.inc ]
+ br label %if.then
+
+if.then: ; preds = %for.body
+ br i1 false, label %L.loopexit, label %if.end
+
+if.end: ; preds = %if.then
+ br i1 undef, label %for.inc, label %if.end9
+
+if.end9: ; preds = %if.end
+ br label %for.inc
+
+for.inc: ; preds = %if.end9, %if.end
+ %h.2 = phi i32 [ %h.128, %if.end ], [ 0, %if.end9 ]
+ br i1 undef, label %while.cond10.loopexit, label %for.body
+
+while.cond10.loopexit: ; preds = %for.inc
+ %h.2.lcssa = phi i32 [ %h.2, %for.inc ]
+ br label %while.cond10
+
+while.cond10: ; preds = %if.end18, %L, %while.cond10.loopexit
+ %h.127 = phi i32 [ %h.126, %if.end18 ], [ %h.125, %L ], [ %h.2.lcssa, %while.cond10.loopexit ]
+ br i1 undef, label %while.cond, label %while.body12
+
+while.body12: ; preds = %while.cond10
+ br i1 undef, label %if.end18, label %L
+
+L.loopexit: ; preds = %if.then
+ br label %L
+
+L: ; preds = %L.loopexit, %while.body12
+ %h.125 = phi i32 [ %h.127, %while.body12 ], [ undef, %L.loopexit ]
+ br i1 undef, label %while.cond10, label %if.end18
+
+if.end18: ; preds = %L, %while.body12
+ %h.126 = phi i32 [ %h.125, %L ], [ %h.127, %while.body12 ]
+ br label %while.cond10
+}
+
+
+define void @hoge() local_unnamed_addr #0 {
+; CHECK-LABEL: @hoge(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[TMP2:%.*]], [[BB1]] ]
+; CHECK-NEXT: [[TMP2]] = add nuw nsw i64 [[TMP]], 1
+; CHECK-NEXT: br label [[BB1]]
+;
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %tmp = phi i64 [ 0, %bb ], [ %tmp2, %bb1 ]
+ %tmp2 = add nuw nsw i64 %tmp, 1
+ br label %bb1
+}
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
+source_filename = "pr33187-c.ll"
+
+define void @a() {
+; CHECK-LABEL: @a(
+; CHECK-NEXT: b:
+; CHECK-NEXT: store i8* null, i8** null
+; CHECK-NEXT: br label [[D:%.*]]
+; CHECK: d:
+; CHECK-NEXT: [[I:%.*]] = phi i8* [ null, [[B:%.*]] ], [ [[E:%.*]], [[F:%.*]] ]
+; CHECK-NEXT: br i1 undef, label [[F]], label [[G:%.*]]
+; CHECK: g:
+; CHECK-NEXT: store i8* [[I]], i8** null
+; CHECK-NEXT: unreachable
+; CHECK: f:
+; CHECK-NEXT: [[E]] = getelementptr i8, i8* [[I]], i64 1
+; CHECK-NEXT: br label [[D]]
+;
+b:
+ store i8* null, i8** null
+ br label %d
+
+d: ; preds = %f, %b
+ %i = phi i8* [ null, %b ], [ %e, %f ]
+ br i1 undef, label %f, label %g
+
+g: ; preds = %d
+ %h = phi i8* [ %i, %d ]
+ store i8* %h, i8** null
+ unreachable
+
+f: ; preds = %d
+ %e = getelementptr i8, i8* %i, i64 1
+ br label %d
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/test/Transforms/SLPVectorizer/X86/arith-fp.ll
index 7eec13e535d4..e00ed849ee4b 100644
--- a/test/Transforms/SLPVectorizer/X86/arith-fp.ll
+++ b/test/Transforms/SLPVectorizer/X86/arith-fp.ll
@@ -10,7 +10,7 @@
define <2 x double> @buildvector_add_2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @buildvector_add_2f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -30,7 +30,7 @@ define <2 x double> @buildvector_add_2f64(<2 x double> %a, <2 x double> %b) {
define <2 x double> @buildvector_sub_2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @buildvector_sub_2f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -50,7 +50,7 @@ define <2 x double> @buildvector_sub_2f64(<2 x double> %a, <2 x double> %b) {
define <2 x double> @buildvector_mul_2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @buildvector_mul_2f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -70,7 +70,7 @@ define <2 x double> @buildvector_mul_2f64(<2 x double> %a, <2 x double> %b) {
define <2 x double> @buildvector_div_2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @buildvector_div_2f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -90,7 +90,7 @@ define <2 x double> @buildvector_div_2f64(<2 x double> %a, <2 x double> %b) {
define <4 x float> @buildvector_add_4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @buildvector_add_4f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -122,7 +122,7 @@ define <4 x float> @buildvector_add_4f32(<4 x float> %a, <4 x float> %b) {
define <4 x float> @buildvector_sub_4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @buildvector_sub_4f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -154,7 +154,7 @@ define <4 x float> @buildvector_sub_4f32(<4 x float> %a, <4 x float> %b) {
define <4 x float> @buildvector_mul_4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @buildvector_mul_4f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -186,7 +186,7 @@ define <4 x float> @buildvector_mul_4f32(<4 x float> %a, <4 x float> %b) {
define <4 x float> @buildvector_div_4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @buildvector_div_4f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -222,7 +222,7 @@ define <4 x float> @buildvector_div_4f32(<4 x float> %a, <4 x float> %b) {
define <4 x double> @buildvector_add_4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @buildvector_add_4f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
@@ -254,7 +254,7 @@ define <4 x double> @buildvector_add_4f64(<4 x double> %a, <4 x double> %b) {
define <4 x double> @buildvector_sub_4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @buildvector_sub_4f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
@@ -286,7 +286,7 @@ define <4 x double> @buildvector_sub_4f64(<4 x double> %a, <4 x double> %b) {
define <4 x double> @buildvector_mul_4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @buildvector_mul_4f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
@@ -318,7 +318,7 @@ define <4 x double> @buildvector_mul_4f64(<4 x double> %a, <4 x double> %b) {
define <4 x double> @buildvector_div_4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @buildvector_div_4f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
@@ -350,7 +350,7 @@ define <4 x double> @buildvector_div_4f64(<4 x double> %a, <4 x double> %b) {
define <8 x float> @buildvector_add_8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @buildvector_add_8f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
@@ -406,7 +406,7 @@ define <8 x float> @buildvector_add_8f32(<8 x float> %a, <8 x float> %b) {
define <8 x float> @buildvector_sub_8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @buildvector_sub_8f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
@@ -462,7 +462,7 @@ define <8 x float> @buildvector_sub_8f32(<8 x float> %a, <8 x float> %b) {
define <8 x float> @buildvector_mul_8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @buildvector_mul_8f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
@@ -518,7 +518,7 @@ define <8 x float> @buildvector_mul_8f32(<8 x float> %a, <8 x float> %b) {
define <8 x float> @buildvector_div_8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @buildvector_div_8f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
@@ -578,7 +578,7 @@ define <8 x float> @buildvector_div_8f32(<8 x float> %a, <8 x float> %b) {
define <8 x double> @buildvector_add_8f64(<8 x double> %a, <8 x double> %b) {
; CHECK-LABEL: @buildvector_add_8f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
@@ -634,7 +634,7 @@ define <8 x double> @buildvector_add_8f64(<8 x double> %a, <8 x double> %b) {
define <8 x double> @buildvector_sub_8f64(<8 x double> %a, <8 x double> %b) {
; CHECK-LABEL: @buildvector_sub_8f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
@@ -690,7 +690,7 @@ define <8 x double> @buildvector_sub_8f64(<8 x double> %a, <8 x double> %b) {
define <8 x double> @buildvector_mul_8f64(<8 x double> %a, <8 x double> %b) {
; CHECK-LABEL: @buildvector_mul_8f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
@@ -746,7 +746,7 @@ define <8 x double> @buildvector_mul_8f64(<8 x double> %a, <8 x double> %b) {
define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) {
; CHECK-LABEL: @buildvector_div_8f64(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
@@ -802,7 +802,7 @@ define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) {
define <16 x float> @buildvector_add_16f32(<16 x float> %a, <16 x float> %b) {
; CHECK-LABEL: @buildvector_add_16f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
@@ -906,7 +906,7 @@ define <16 x float> @buildvector_add_16f32(<16 x float> %a, <16 x float> %b) {
define <16 x float> @buildvector_sub_16f32(<16 x float> %a, <16 x float> %b) {
; CHECK-LABEL: @buildvector_sub_16f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
@@ -1010,7 +1010,7 @@ define <16 x float> @buildvector_sub_16f32(<16 x float> %a, <16 x float> %b) {
define <16 x float> @buildvector_mul_16f32(<16 x float> %a, <16 x float> %b) {
; CHECK-LABEL: @buildvector_mul_16f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
@@ -1114,7 +1114,7 @@ define <16 x float> @buildvector_mul_16f32(<16 x float> %a, <16 x float> %b) {
define <16 x float> @buildvector_div_16f32(<16 x float> %a, <16 x float> %b) {
; CHECK-LABEL: @buildvector_div_16f32(
-; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
diff --git a/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll b/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
new file mode 100644
index 000000000000..4c8748e220fd
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
+
+define float @dotf(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @dotf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fmul fast <4 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT: ret float [[TMP2]]
+;
+entry:
+ %vecext = extractelement <4 x float> %x, i32 0
+ %vecext1 = extractelement <4 x float> %y, i32 0
+ %mul = fmul fast float %vecext, %vecext1
+ %vecext.1 = extractelement <4 x float> %x, i32 1
+ %vecext1.1 = extractelement <4 x float> %y, i32 1
+ %mul.1 = fmul fast float %vecext.1, %vecext1.1
+ %add.1 = fadd fast float %mul.1, %mul
+ %vecext.2 = extractelement <4 x float> %x, i32 2
+ %vecext1.2 = extractelement <4 x float> %y, i32 2
+ %mul.2 = fmul fast float %vecext.2, %vecext1.2
+ %add.2 = fadd fast float %mul.2, %add.1
+ %vecext.3 = extractelement <4 x float> %x, i32 3
+ %vecext1.3 = extractelement <4 x float> %y, i32 3
+ %mul.3 = fmul fast float %vecext.3, %vecext1.3
+ %add.3 = fadd fast float %mul.3, %add.2
+ ret float %add.3
+}
+
+define double @dotd(<4 x double>* byval nocapture readonly align 32, <4 x double>* byval nocapture readonly align 32) {
+; CHECK-LABEL: @dotd(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[TMP0:%.*]], align 32
+; CHECK-NEXT: [[Y:%.*]] = load <4 x double>, <4 x double>* [[TMP1:%.*]], align 32
+; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[X]], [[Y]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP3]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0
+; CHECK-NEXT: ret double [[TMP4]]
+;
+entry:
+ %x = load <4 x double>, <4 x double>* %0, align 32
+ %y = load <4 x double>, <4 x double>* %1, align 32
+ %vecext = extractelement <4 x double> %x, i32 0
+ %vecext1 = extractelement <4 x double> %y, i32 0
+ %mul = fmul fast double %vecext, %vecext1
+ %vecext.1 = extractelement <4 x double> %x, i32 1
+ %vecext1.1 = extractelement <4 x double> %y, i32 1
+ %mul.1 = fmul fast double %vecext.1, %vecext1.1
+ %add.1 = fadd fast double %mul.1, %mul
+ %vecext.2 = extractelement <4 x double> %x, i32 2
+ %vecext1.2 = extractelement <4 x double> %y, i32 2
+ %mul.2 = fmul fast double %vecext.2, %vecext1.2
+ %add.2 = fadd fast double %mul.2, %add.1
+ %vecext.3 = extractelement <4 x double> %x, i32 3
+ %vecext1.3 = extractelement <4 x double> %y, i32 3
+ %mul.3 = fmul fast double %vecext.3, %vecext1.3
+ %add.3 = fadd fast double %mul.3, %add.2
+ ret double %add.3
+}
+
+define float @dotfq(<4 x float>* nocapture readonly %x, <4 x float>* nocapture readonly %y) {
+; CHECK-LABEL: @dotfq(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[Y:%.*]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT: ret float [[TMP4]]
+;
+entry:
+ %0 = load <4 x float>, <4 x float>* %x, align 16
+ %1 = load <4 x float>, <4 x float>* %y, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ %vecext1 = extractelement <4 x float> %1, i32 0
+ %mul = fmul fast float %vecext1, %vecext
+ %vecext.1 = extractelement <4 x float> %0, i32 1
+ %vecext1.1 = extractelement <4 x float> %1, i32 1
+ %mul.1 = fmul fast float %vecext1.1, %vecext.1
+ %add.1 = fadd fast float %mul.1, %mul
+ %vecext.2 = extractelement <4 x float> %0, i32 2
+ %vecext1.2 = extractelement <4 x float> %1, i32 2
+ %mul.2 = fmul fast float %vecext1.2, %vecext.2
+ %add.2 = fadd fast float %mul.2, %add.1
+ %vecext.3 = extractelement <4 x float> %0, i32 3
+ %vecext1.3 = extractelement <4 x float> %1, i32 3
+ %mul.3 = fmul fast float %vecext1.3, %vecext.3
+ %add.3 = fadd fast float %mul.3, %add.2
+ ret float %add.3
+}
+
+define double @dotdq(<4 x double>* nocapture readonly %x, <4 x double>* nocapture readonly %y) {
+; CHECK-LABEL: @dotdq(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[X:%.*]], align 32
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[Y:%.*]], align 32
+; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP3]], [[RDX_SHUF]]
+; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0
+; CHECK-NEXT: ret double [[TMP4]]
+;
+entry:
+ %0 = load <4 x double>, <4 x double>* %x, align 32
+ %1 = load <4 x double>, <4 x double>* %y, align 32
+ %vecext = extractelement <4 x double> %0, i32 0
+ %vecext1 = extractelement <4 x double> %1, i32 0
+ %mul = fmul fast double %vecext1, %vecext
+ %vecext.1 = extractelement <4 x double> %0, i32 1
+ %vecext1.1 = extractelement <4 x double> %1, i32 1
+ %mul.1 = fmul fast double %vecext1.1, %vecext.1
+ %add.1 = fadd fast double %mul.1, %mul
+ %vecext.2 = extractelement <4 x double> %0, i32 2
+ %vecext1.2 = extractelement <4 x double> %1, i32 2
+ %mul.2 = fmul fast double %vecext1.2, %vecext.2
+ %add.2 = fadd fast double %mul.2, %add.1
+ %vecext.3 = extractelement <4 x double> %0, i32 3
+ %vecext1.3 = extractelement <4 x double> %1, i32 3
+ %mul.3 = fmul fast double %vecext1.3, %vecext.3
+ %add.3 = fadd fast double %mul.3, %add.2
+ ret double %add.3
+}
diff --git a/test/Transforms/SROA/address-spaces.ll b/test/Transforms/SROA/address-spaces.ll
index 8fba30c2720f..a54a3afc79f9 100644
--- a/test/Transforms/SROA/address-spaces.ll
+++ b/test/Transforms/SROA/address-spaces.ll
@@ -101,3 +101,31 @@ entry:
%ret = fadd float %f1, %f2
ret float %ret
}
+
+; Test load from and store to non-zero address space.
+define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) {
+; CHECK-LABEL: @test_load_store_diff_addr_space
+; CHECK-NOT: alloca
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
+; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
+ %a = alloca i64
+ %a.cast = bitcast i64* %a to [2 x float]*
+ %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0
+ %a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1
+ %complex1.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex1, i32 0, i32 0
+ %p1 = bitcast float addrspace(1)* %complex1.gep to i64 addrspace(1)*
+ %v1 = load i64, i64 addrspace(1)* %p1
+ store i64 %v1, i64* %a
+ %f1 = load float, float* %a.gep1
+ %f2 = load float, float* %a.gep2
+ %sum = fadd float %f1, %f2
+ store float %sum, float* %a.gep1
+ store float %sum, float* %a.gep2
+ %v2 = load i64, i64* %a
+ %complex2.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex2, i32 0, i32 0
+ %p2 = bitcast float addrspace(1)* %complex2.gep to i64 addrspace(1)*
+ store i64 %v2, i64 addrspace(1)* %p2
+ ret void
+}
diff --git a/test/Transforms/SampleProfile/Inputs/indirect-call.prof b/test/Transforms/SampleProfile/Inputs/indirect-call.prof
index ff7be5df977a..ff1368142a0d 100644
--- a/test/Transforms/SampleProfile/Inputs/indirect-call.prof
+++ b/test/Transforms/SampleProfile/Inputs/indirect-call.prof
@@ -17,3 +17,6 @@ test_inline_strip:3000:0
test_inline_strip_conflict:3000:0
1: foo_inline_strip_conflict:3000
1: 3000
+test_norecursive_inline:3000:0
+ 1: test_norecursive_inline:3000
+ 20: 3000
diff --git a/test/Transforms/SampleProfile/indirect-call.ll b/test/Transforms/SampleProfile/indirect-call.ll
index 4101f6f492e5..bee98f1066d2 100644
--- a/test/Transforms/SampleProfile/indirect-call.ll
+++ b/test/Transforms/SampleProfile/indirect-call.ll
@@ -69,7 +69,18 @@ define void @test_noinline(void ()*) !dbg !12 {
ret void
}
+; CHECK-LABEL: @test_norecursive_inline
+; If the indirect call target is the caller, we should not promote it.
+define void @test_norecursive_inline() !dbg !24 {
+; CHECK-NOT: icmp
+; CHECK: call
+ %1 = load void ()*, void ()** @y, align 8
+ call void %1(), !dbg !25
+ ret void
+}
+
@x = global i32 0, align 4
+@y = global void ()* null, align 8
define i32* @foo_inline1(i32* %x) !dbg !14 {
ret i32* %x
@@ -142,3 +153,5 @@ define void @test_direct() !dbg !22 {
!21 = distinct !DISubprogram(name: "foo_direct", scope: !1, file: !1, line: 21, unit: !0)
!22 = distinct !DISubprogram(name: "test_direct", scope: !1, file: !1, line: 22, unit: !0)
!23 = !DILocation(line: 23, scope: !22)
+!24 = distinct !DISubprogram(name: "test_norecursive_inline", scope: !1, file: !1, line: 12, unit: !0)
+!25 = !DILocation(line: 13, scope: !24)
diff --git a/test/Transforms/Sink/badloadsink.ll b/test/Transforms/Sink/badloadsink.ll
new file mode 100644
index 000000000000..e3f4884c5a40
--- /dev/null
+++ b/test/Transforms/Sink/badloadsink.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -basicaa -sink -S | FileCheck %s
+declare void @foo(i64 *)
+define i64 @sinkload(i1 %cmp) {
+; CHECK-LABEL: @sinkload
+top:
+ %a = alloca i64
+; CHECK: call void @foo(i64* %a)
+; CHECK-NEXT: %x = load i64, i64* %a
+ call void @foo(i64* %a)
+ %x = load i64, i64* %a
+ br i1 %cmp, label %A, label %B
+A:
+ store i64 0, i64 *%a
+ br label %B
+B:
+; CHECK-NOT: load i64, i64 *%a
+ ret i64 %x
+}
diff --git a/test/Transforms/ThinLTOBitcodeWriter/split.ll b/test/Transforms/ThinLTOBitcodeWriter/split.ll
index d37d10bd3560..8bf3a18cd7f9 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/split.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/split.ll
@@ -25,6 +25,9 @@
; ERROR: llvm-modextract: error: module index out of range; bitcode file contains 2 module(s)
; BCA0: <GLOBALVAL_SUMMARY_BLOCK
+; BCA1: <FULL_LTO_GLOBALVAL_SUMMARY_BLOCK
+; 16 = not eligible to import
+; BCA1: <PERMODULE_GLOBALVAR_INIT_REFS {{.*}} op1=16
; BCA1-NOT: <GLOBALVAL_SUMMARY_BLOCK
$g = comdat any
@@ -47,5 +50,6 @@ define i8* @f() {
; NODEBUG-NOT: !llvm.dbg.cu
!llvm.dbg.cu = !{}
+; M1: !{i32 1, !"ThinLTO", i32 0}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!llvm.module.flags = !{!1}
diff --git a/test/Transforms/Util/PredicateInfo/condprop2.ll b/test/Transforms/Util/PredicateInfo/condprop2.ll
index 415fa7c879e3..facd22f5b7a6 100644
--- a/test/Transforms/Util/PredicateInfo/condprop2.ll
+++ b/test/Transforms/Util/PredicateInfo/condprop2.ll
@@ -1,4 +1,4 @@
-; REQUIRES: asserts
+; REQUIRES: abi-breaking-checks
; NOTE: The flag -reverse-iterate is present only in a +Asserts build.
; Hence, this test has been split from condprop.ll to test with -reverse-iterate.
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
diff --git a/test/Transforms/Util/PredicateInfo/testandor2.ll b/test/Transforms/Util/PredicateInfo/testandor2.ll
index a03250c2f7a0..a1b9c62040c8 100644
--- a/test/Transforms/Util/PredicateInfo/testandor2.ll
+++ b/test/Transforms/Util/PredicateInfo/testandor2.ll
@@ -1,4 +1,4 @@
-; REQUIRES: asserts
+; REQUIRES: abi-breaking-checks
; NOTE: The flag -reverse-iterate is present only in a +Asserts build.
; Hence, this test has been split from testandor.ll to test with -reverse-iterate.
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
diff --git a/test/lit.cfg b/test/lit.cfg
index e9916b2a60e8..5e903c26657e 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -546,3 +546,6 @@ llvm_config_cmd.wait()
if config.have_libxar:
config.available_features.add('xar')
+
+if config.enable_abi_breaking_checks == "1":
+ config.available_features.add('abi-breaking-checks')
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index b6a8b8b17bca..f95f6d8ec9ac 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -24,6 +24,7 @@ config.include_go_tests = @LLVM_INCLUDE_GO_TESTS@
config.go_executable = "@GO_EXECUTABLE@"
config.enable_shared = @ENABLE_SHARED@
config.enable_assertions = @ENABLE_ASSERTIONS@
+config.enable_abi_breaking_checks = "@LLVM_ENABLE_ABI_BREAKING_CHECKS@"
config.targets_to_build = "@TARGETS_TO_BUILD@"
config.native_target = "@LLVM_NATIVE_ARCH@"
config.llvm_bindings = "@LLVM_BINDINGS@".split(' ')
diff --git a/test/tools/llvm-cvtres/Inputs/test_resource.obj.coff b/test/tools/llvm-cvtres/Inputs/test_resource.obj.coff
new file mode 100644
index 000000000000..e5a7cbd01196
--- /dev/null
+++ b/test/tools/llvm-cvtres/Inputs/test_resource.obj.coff
Binary files differ
diff --git a/test/tools/llvm-cvtres/object.test b/test/tools/llvm-cvtres/object.test
new file mode 100644
index 000000000000..8117ecc910c9
--- /dev/null
+++ b/test/tools/llvm-cvtres/object.test
@@ -0,0 +1,229 @@
+// Check COFF emission of cvtres
+// The input was generated with the following command, using the original Windows
+// rc.exe:
+// > rc /fo test_resource.res /nologo test_resource.rc
+// The object file we are comparing against was generated with this command using
+// the original cvtres.
+// > cvtres /machine:X86 /readonly /nologo /out:test_resource.o test_resource.res
+
+RUN: llvm-cvtres /out:%t %p/Inputs/test_resource.res
+RUN: llvm-readobj -coff-resources -section-data %t | FileCheck %s
+
+CHECK: Resources [
+CHECK-NEXT: String Name Entries: 1
+CHECK-NEXT: ID Entries: 4
+CHECK-NEXT: Type: STRINGARRAY [
+CHECK-NEXT: String Name Entries: 1
+CHECK-NEXT: ID Entries: 0
+CHECK-NEXT: Name: MYRESOURCE [
+CHECK-NEXT: String Name Entries: 0
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Language: (ID 1033) [
+CHECK-NEXT: Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT: Major Version: 0
+CHECK-NEXT: Minor Version: 0
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: Type: kRT_BITMAP (ID 2) [
+CHECK-NEXT: String Name Entries: 2
+CHECK-NEXT: ID Entries: 0
+CHECK-NEXT: Name: CURSOR [
+CHECK-NEXT: String Name Entries: 0
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Language: (ID 1033) [
+CHECK-NEXT: Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT: Major Version: 0
+CHECK-NEXT: Minor Version: 0
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: Name: OKAY [
+CHECK-NEXT: String Name Entries: 0
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Language: (ID 1033) [
+CHECK-NEXT: Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT: Major Version: 0
+CHECK-NEXT: Minor Version: 0
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: Type: kRT_MENU (ID 4) [
+CHECK-NEXT: String Name Entries: 1
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Name: "EAT" [
+CHECK-NEXT: String Name Entries: 0
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Language: (ID 3081) [
+CHECK-NEXT: Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT: Major Version: 0
+CHECK-NEXT: Minor Version: 0
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: Name: (ID 14432) [
+CHECK-NEXT: String Name Entries: 0
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Language: (ID 2052) [
+CHECK-NEXT: Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT: Major Version: 0
+CHECK-NEXT: Minor Version: 0
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: Type: kRT_DIALOG (ID 5) [
+CHECK-NEXT: String Name Entries: 1
+CHECK-NEXT: ID Entries: 0
+CHECK-NEXT: Name: TESTDIALOG [
+CHECK-NEXT: String Name Entries: 0
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Language: (ID 1033) [
+CHECK-NEXT: Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT: Major Version: 0
+CHECK-NEXT: Minor Version: 0
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: Type: kRT_ACCELERATOR (ID 9) [
+CHECK-NEXT: String Name Entries: 1
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Name: MYACCELERATORS [
+CHECK-NEXT: String Name Entries: 0
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Language: (ID 1033) [
+CHECK-NEXT: Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT: Major Version: 0
+CHECK-NEXT: Minor Version: 0
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: Name: (ID 12) [
+CHECK-NEXT: String Name Entries: 0
+CHECK-NEXT: ID Entries: 1
+CHECK-NEXT: Language: (ID 1033) [
+CHECK-NEXT: Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT: Major Version: 0
+CHECK-NEXT: Minor Version: 0
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-NEXT: ]
+CHECK-DAG: .rsrc$02 Data (
+CHECK-NEXT: 0000: 11000300 E7030000 0D004400 4C040000 |..........D.L...|
+CHECK-NEXT: 0010: 82001200 BC010000 28000000 10000000 |........(.......|
+CHECK-NEXT: 0020: 10000000 01001800 00000000 00030000 |................|
+CHECK-NEXT: 0030: C40E0000 C40E0000 00000000 00000000 |................|
+CHECK-NEXT: 0040: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0050: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0060: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0070: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0080: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0090: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 00A0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 00B0: FFFFFFFF FF7F7F7F 7C7C7C78 78787575 |........|||xxxuu|
+CHECK-NEXT: 00C0: 75FFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |u...............|
+CHECK-NEXT: 00D0: FFFFFFFF FFFFFFFF FFFFFFFF 979797FF |................|
+CHECK-NEXT: 00E0: FFFFFFFF FF838383 AAAAAADB DBDB7979 |..............yy|
+CHECK-NEXT: 00F0: 79757575 FFFFFFFF FFFFFFFF FFFFFFFF |yuuu............|
+CHECK-NEXT: 0100: FFFFFFFF FFFFFFFF FFFFFFFF 9C9C9C98 |................|
+CHECK-NEXT: 0110: 9898FFFF FF888888 DBDBDBB7 B7B77D7D |..............}}|
+CHECK-NEXT: 0120: 7DFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |}...............|
+CHECK-NEXT: 0130: FFFFFFFF FFFFFFFF FFFFFFFF A0A0A09C |................|
+CHECK-NEXT: 0140: 9C9C9393 93ADADAD F2F2F284 84848181 |................|
+CHECK-NEXT: 0150: 81FFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0160: FFFFFFFF FFFFFFFF FFFFFFFF A4A4A4D7 |................|
+CHECK-NEXT: 0170: D7D79D9D 9DD0D0D0 EEEEEE91 91918D8D |................|
+CHECK-NEXT: 0180: 8DFFFFFF FFFFFF81 81817E7E 7EFFFFFF |..........~~~...|
+CHECK-NEXT: 0190: FFFFFFFF FFFFFFFF FFFFFFFF A9A9A9F2 |................|
+CHECK-NEXT: 01A0: F2F2E5E5 E5E2E2E2 95959591 91918D8D |................|
+CHECK-NEXT: 01B0: 8D898989 868686FF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 01C0: FFFFFFFF FFFFFFFF FFFFFFFF ADADADF2 |................|
+CHECK-NEXT: 01D0: F2F2E1E1 E1DFDFDF E7E7E7E4 E4E4BBBB |................|
+CHECK-NEXT: 01E0: BB8E8E8E FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 01F0: FFFFFFFF FFFFFFFF FFFFFFFF B5B5B5F2 |................|
+CHECK-NEXT: 0200: F2F2E8E8 E8E7E7E7 EAEAEAC6 C6C69E9E |................|
+CHECK-NEXT: 0210: 9EFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0220: FFFFFFFF FFFFFFFF FFFFFFFF B9B9B9F4 |................|
+CHECK-NEXT: 0230: F4F4ECEC ECEDEDED CBCBCBA7 A7A7FFFF |................|
+CHECK-NEXT: 0240: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0250: FFFFFFFF FFFFFFFF FFFFFFFF BDBDBDF7 |................|
+CHECK-NEXT: 0260: F7F7EFEF EFD0D0D0 AFAFAFFF FFFFFFFF |................|
+CHECK-NEXT: 0270: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0280: FFFFFFFF FFFFFFFF FFFFFFFF C1C1C1F7 |................|
+CHECK-NEXT: 0290: F7F7D5D5 D5B6B6B6 FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 02A0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 02B0: FFFFFFFF FFFFFFFF FFFFFFFF C4C4C4D9 |................|
+CHECK-NEXT: 02C0: D9D9BEBE BEFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 02D0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 02E0: FFFFFFFF FFFFFFFF FFFFFFFF C8C8C8C5 |................|
+CHECK-NEXT: 02F0: C5C5FFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0300: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0310: FFFFFFFF FFFFFFFF FFFFFFFF CBCBCBFF |................|
+CHECK-NEXT: 0320: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0330: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0340: 28000000 10000000 10000000 01001800 |(...............|
+CHECK-NEXT: 0350: 00000000 00030000 C40E0000 C40E0000 |................|
+CHECK-NEXT: 0360: 00000000 00000000 FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0370: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0380: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0390: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 03A0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 03B0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 03C0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 03D0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 03E0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 03F0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0400: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0410: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0420: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0430: FFFFFFFF A0E3A901 B31801B3 1801B318 |................|
+CHECK-NEXT: 0440: 01B31801 B31801B3 1861D06F FFFFFFFF |.........a.o....|
+CHECK-NEXT: 0450: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0460: FFFFFFFF 01B31800 D7331CDB 49DBF9E2 |.........3..I...|
+CHECK-NEXT: 0470: 9BEFAF00 D73300D7 3301B318 FFFFFFFF |.....3..3.......|
+CHECK-NEXT: 0480: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0490: FFFFFFFF 01B31800 DE55F6FE F9DBFAE7 |.........U......|
+CHECK-NEXT: 04A0: FEFFFE86 EFAE00DE 5501B318 FFFFFFFF |........U.......|
+CHECK-NEXT: 04B0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 04C0: FFFFFFFF 01B31800 E676DBFB EC00E676 |.........v.....v|
+CHECK-NEXT: 04D0: 57EFA5FB FFFD55EE A401B318 FFFFFFFF |W.....U.........|
+CHECK-NEXT: 04E0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 04F0: FFFFFFFF 01B31800 ED9800ED 9800ED98 |................|
+CHECK-NEXT: 0500: 00ED9887 F7CFFEFF FF01B318 FFFFFFFF |................|
+CHECK-NEXT: 0510: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0520: FFFFFFFF 01B31800 F4BA00F4 BA00F4BA |................|
+CHECK-NEXT: 0530: 00F4BA00 F4BA9CFB E401B318 FFFFFFFF |................|
+CHECK-NEXT: 0540: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0550: FFFFFFFF 01B31800 FBDB00FB DB00FBDB |................|
+CHECK-NEXT: 0560: 00FBDB00 FBDB00FB DB01B318 FFFFFFFF |................|
+CHECK-NEXT: 0570: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0580: FFFFFFFF 9FE2A801 B31801B3 1801B318 |................|
+CHECK-NEXT: 0590: 01B31801 B31801B3 1861D06F FFFFFFFF |.........a.o....|
+CHECK-NEXT: 05A0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 05B0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 05C0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 05D0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 05E0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 05F0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0600: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0610: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0620: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0630: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0640: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0650: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF |................|
+CHECK-NEXT: 0660: FFFFFFFF FFFFFFFF 00000000 00006400 |..............d.|
+CHECK-NEXT: 0670: 79007500 00000000 65007300 68006100 |y.u.....e.s.h.a.|
+CHECK-NEXT: 0680: 6C006100 00008000 66006B00 61006F00 |l.a.....f.k.a.o.|
+CHECK-NEXT: 0690: 79006100 00000000 0000C080 00000000 |y.a.............|
+CHECK-NEXT: 06A0: 02000A00 0A00C800 2C010000 00005400 |........,.....T.|
+CHECK-NEXT: 06B0: 65007300 74000000 01000250 00000000 |e.s.t......P....|
+CHECK-NEXT: 06C0: 0A000A00 E6000E00 0100FFFF 82004300 |..............C.|
+CHECK-NEXT: 06D0: 6F006E00 74006900 6E007500 65003A00 |o.n.t.i.n.u.e.:.|
+CHECK-NEXT: 06E0: 00000000 00000150 00000000 42008600 |.......P....B...|
+CHECK-NEXT: 06F0: A1000D00 0200FFFF 80002600 4F004B00 |..........&.O.K.|
+CHECK-NEXT: 0700: 00000000 00000000 11005800 A4000000 |..........X.....|
+CHECK-NEXT: 0710: 0D004800 2E160000 82001200 BC010000 |..H.............|
+CHECK-NEXT: 0720: 00000000 00006400 66006900 73006800 |......d.f.i.s.h.|
+CHECK-NEXT: 0730: 00000000 65007300 61006C00 61006400 |....e.s.a.l.a.d.|
+CHECK-NEXT: 0740: 00008000 66006400 75006300 6B000000 |....f.d.u.c.k...|
+CHECK-NEXT: 0750: 74686973 20697320 61207573 65722064 |this is a user d|
+CHECK-NEXT: 0760: 6566696E 65642072 65736F75 72636500 |efined resource.|
+CHECK-NEXT: 0770: 69742063 6F6E7461 696E7320 6D616E79 |it contains many|
+CHECK-NEXT: 0780: 20737472 696E6773 00000000 00000000 | strings........|
+CHECK-NEXT: )
diff --git a/test/tools/llvm-cvtres/resource.test b/test/tools/llvm-cvtres/parse.test
index b9be74bf671b..23911ada82e8 100644
--- a/test/tools/llvm-cvtres/resource.test
+++ b/test/tools/llvm-cvtres/parse.test
@@ -2,7 +2,7 @@
// rc.exe:
// > rc /fo test_resource.res /nologo test_resource.rc
-RUN: llvm-cvtres %p/Inputs/test_resource.res | FileCheck %s
+RUN: llvm-cvtres /verbose /out:%t %p/Inputs/test_resource.res | FileCheck %s
CHECK: Number of resources: 8
CHECK-NEXT: Resource Tree [
diff --git a/test/tools/llvm-dwarfdump/X86/brief.s b/test/tools/llvm-dwarfdump/X86/brief.s
new file mode 100644
index 000000000000..82c499de8c7f
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/brief.s
@@ -0,0 +1,131 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o - \
+# RUN: | llvm-dwarfdump -debug-dump=info -brief - \
+# RUN: | FileCheck %s
+
+# CHECK: DW_TAG_compile_unit
+# CHECK-NOT: DW_FORM
+# CHECK: DW_AT
+
+# This test is meant to verify that --brief hides DW_FORMs
+# and abbreviation codes from .debug_info section.
+
+
+ .section __TEXT,__text,regular,pure_instructions
+ .section __DWARF,__debug_str,regular,debug
+Linfo_string:
+ .asciz "basic.c" ## string offset=42
+ .section __DWARF,__debug_loc,regular,debug
+Lsection_debug_loc:
+ .section __DWARF,__debug_abbrev,regular,debug
+Lsection_abbrev:
+ .byte 1 ## Abbreviation Code
+ .byte 17 ## DW_TAG_compile_unit
+ .byte 0 ## DW_CHILDREN_no
+ .byte 37 ## DW_AT_producer
+ .byte 14 ## DW_FORM_strp
+ .byte 19 ## DW_AT_language
+ .byte 5 ## DW_FORM_data2
+ .byte 3 ## DW_AT_name
+ .byte 14 ## DW_FORM_strp
+ .byte 16 ## DW_AT_stmt_list
+ .byte 23 ## DW_FORM_sec_offset
+ .byte 27 ## DW_AT_comp_dir
+ .byte 14 ## DW_FORM_strp
+ .byte 0 ## EOM(1)
+ .byte 0 ## EOM(2)
+ .byte 0 ## EOM(3)
+ .section __DWARF,__debug_info,regular,debug
+Lsection_info:
+Lcu_begin0:
+ .long 26 ## Length of Unit
+ .short 4 ## DWARF version number
+Lset0 = Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
+ .long Lset0
+ .byte 8 ## Address Size (in bytes)
+ .byte 1 ## Abbrev [1] 0xb:0x13 DW_TAG_compile_unit
+ .long 0 ## DW_AT_producer
+ .short 12 ## DW_AT_language
+ .long 42 ## DW_AT_name
+Lset1 = Lline_table_start0-Lsection_line ## DW_AT_stmt_list
+ .long Lset1
+ .long 50 ## DW_AT_comp_dir
+ .section __DWARF,__debug_ranges,regular,debug
+Ldebug_range:
+ .section __DWARF,__debug_macinfo,regular,debug
+Ldebug_macinfo:
+Lcu_macro_begin0:
+ .byte 0 ## End Of Macro List Mark
+ .section __DWARF,__apple_names,regular,debug
+Lnames_begin:
+ .long 1212240712 ## Header Magic
+ .short 1 ## Header Version
+ .short 0 ## Header Hash Function
+ .long 1 ## Header Bucket Count
+ .long 0 ## Header Hash Count
+ .long 12 ## Header Data Length
+ .long 0 ## HeaderData Die Offset Base
+ .long 1 ## HeaderData Atom Count
+ .short 1 ## DW_ATOM_die_offset
+ .short 6 ## DW_FORM_data4
+ .long -1 ## Bucket 0
+ .section __DWARF,__apple_objc,regular,debug
+Lobjc_begin:
+ .long 1212240712 ## Header Magic
+ .short 1 ## Header Version
+ .short 0 ## Header Hash Function
+ .long 1 ## Header Bucket Count
+ .long 0 ## Header Hash Count
+ .long 12 ## Header Data Length
+ .long 0 ## HeaderData Die Offset Base
+ .long 1 ## HeaderData Atom Count
+ .short 1 ## DW_ATOM_die_offset
+ .short 6 ## DW_FORM_data4
+ .long -1 ## Bucket 0
+ .section __DWARF,__apple_namespac,regular,debug
+Lnamespac_begin:
+ .long 1212240712 ## Header Magic
+ .short 1 ## Header Version
+ .short 0 ## Header Hash Function
+ .long 1 ## Header Bucket Count
+ .long 0 ## Header Hash Count
+ .long 12 ## Header Data Length
+ .long 0 ## HeaderData Die Offset Base
+ .long 1 ## HeaderData Atom Count
+ .short 1 ## DW_ATOM_die_offset
+ .short 6 ## DW_FORM_data4
+ .long -1 ## Bucket 0
+ .section __DWARF,__apple_types,regular,debug
+Ltypes_begin:
+ .long 1212240712 ## Header Magic
+ .short 1 ## Header Version
+ .short 0 ## Header Hash Function
+ .long 1 ## Header Bucket Count
+ .long 0 ## Header Hash Count
+ .long 20 ## Header Data Length
+ .long 0 ## HeaderData Die Offset Base
+ .long 3 ## HeaderData Atom Count
+ .short 1 ## DW_ATOM_die_offset
+ .short 6 ## DW_FORM_data4
+ .short 3 ## DW_ATOM_die_tag
+ .short 5 ## DW_FORM_data2
+ .short 4 ## DW_ATOM_type_flags
+ .short 11 ## DW_FORM_data1
+ .long -1 ## Bucket 0
+ .section __DWARF,__apple_exttypes,regular,debug
+Lexttypes_begin:
+ .long 1212240712 ## Header Magic
+ .short 1 ## Header Version
+ .short 0 ## Header Hash Function
+ .long 1 ## Header Bucket Count
+ .long 0 ## Header Hash Count
+ .long 12 ## Header Data Length
+ .long 0 ## HeaderData Die Offset Base
+ .long 1 ## HeaderData Atom Count
+ .short 7 ## DW_ATOM_ext_types
+ .short 6 ## DW_FORM_data4
+ .long -1 ## Bucket 0
+
+.subsections_via_symbols
+ .section __DWARF,__debug_line,regular,debug
+Lsection_line:
+Lline_table_start0:
diff --git a/test/tools/llvm-dwarfdump/X86/lit.local.cfg b/test/tools/llvm-dwarfdump/X86/lit.local.cfg
new file mode 100644
index 000000000000..c8625f4d9d24
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
diff --git a/test/tools/llvm-pdbdump/class-layout.test b/test/tools/llvm-pdbdump/class-layout.test
index 4a69c17db2ba..7a08194c5005 100644
--- a/test/tools/llvm-pdbdump/class-layout.test
+++ b/test/tools/llvm-pdbdump/class-layout.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pretty -all -class-recurse-depth=1 \
+; RUN: llvm-pdbutil pretty -all -class-recurse-depth=1 \
; RUN: %p/Inputs/ClassLayoutTest.pdb > %t
; RUN: FileCheck -input-file=%t %s -check-prefix=GLOBALS_TEST
; RUN: FileCheck -input-file=%t %s -check-prefix=MEMBERS_TEST
diff --git a/test/tools/llvm-pdbdump/complex-padding-graphical.test b/test/tools/llvm-pdbdump/complex-padding-graphical.test
index a23321fe0124..9373c1ec6c2f 100644
--- a/test/tools/llvm-pdbdump/complex-padding-graphical.test
+++ b/test/tools/llvm-pdbdump/complex-padding-graphical.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pretty -classes -class-definitions=layout \
+; RUN: llvm-pdbutil pretty -classes -class-definitions=layout \
; RUN: -include-types=Test %p/Inputs/ComplexPaddingTest.pdb > %t
; RUN: FileCheck -input-file=%t %s -check-prefix=DIRECT_VB_ONLY
diff --git a/test/tools/llvm-pdbdump/enum-layout.test b/test/tools/llvm-pdbdump/enum-layout.test
index df447c65bbae..5813321f000d 100644
--- a/test/tools/llvm-pdbdump/enum-layout.test
+++ b/test/tools/llvm-pdbdump/enum-layout.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pretty -types %p/Inputs/ClassLayoutTest.pdb > %t
+; RUN: llvm-pdbutil pretty -types %p/Inputs/ClassLayoutTest.pdb > %t
; RUN: FileCheck -input-file=%t %s -check-prefix=GLOBAL_ENUM
; RUN: FileCheck -input-file=%t %s -check-prefix=MEMBER_ENUM
diff --git a/test/tools/llvm-pdbdump/load-address.test b/test/tools/llvm-pdbdump/load-address.test
index 5791637d3a74..4402790d71f4 100644
--- a/test/tools/llvm-pdbdump/load-address.test
+++ b/test/tools/llvm-pdbdump/load-address.test
@@ -1,6 +1,6 @@
-; RUN: llvm-pdbdump pretty -externals %p/Inputs/LoadAddressTest.pdb \
+; RUN: llvm-pdbutil pretty -externals %p/Inputs/LoadAddressTest.pdb \
; RUN: | FileCheck --check-prefix=RVA %s
-; RUN: llvm-pdbdump pretty -externals -load-address=0x40000000 \
+; RUN: llvm-pdbutil pretty -externals -load-address=0x40000000 \
; RUN: %p/Inputs/LoadAddressTest.pdb | FileCheck --check-prefix=VA %s
; RVA: ---EXTERNALS---
diff --git a/test/tools/llvm-pdbdump/raw-stream-data.test b/test/tools/llvm-pdbdump/raw-stream-data.test
index d55980632d41..1d7c23fe3672 100644
--- a/test/tools/llvm-pdbdump/raw-stream-data.test
+++ b/test/tools/llvm-pdbdump/raw-stream-data.test
@@ -1,8 +1,8 @@
-; RUN: llvm-pdbdump raw -stream-data=8 %p/Inputs/LoadAddressTest.pdb \
+; RUN: llvm-pdbutil raw -stream-data=8 %p/Inputs/LoadAddressTest.pdb \
; RUN: | FileCheck %s -check-prefix=FULL_STREAM
-; RUN: llvm-pdbdump raw -stream-data=8:4 %p/Inputs/LoadAddressTest.pdb \
+; RUN: llvm-pdbutil raw -stream-data=8:4 %p/Inputs/LoadAddressTest.pdb \
; RUN: | FileCheck %s -check-prefix=OFFSET_STREAM
-; RUN: llvm-pdbdump raw -stream-data=8:4@24 %p/Inputs/LoadAddressTest.pdb \
+; RUN: llvm-pdbutil raw -stream-data=8:4@24 %p/Inputs/LoadAddressTest.pdb \
; RUN: | FileCheck %s -check-prefix=OFFSET_AND_LENGTH
FULL_STREAM: Stream Data {
diff --git a/test/tools/llvm-pdbdump/regex-filter.test b/test/tools/llvm-pdbdump/regex-filter.test
index 36c3da33e2e4..fb8ca7fa1892 100644
--- a/test/tools/llvm-pdbdump/regex-filter.test
+++ b/test/tools/llvm-pdbdump/regex-filter.test
@@ -1,25 +1,25 @@
-; RUN: llvm-pdbdump pretty -module-syms -globals -types %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -module-syms -globals -types %p/Inputs/FilterTest.pdb \
; RUN: | FileCheck --check-prefix=NO_FILTER %s
-; RUN: llvm-pdbdump pretty -types -exclude-types="GlobalTypedef|NestedTypedef" \
+; RUN: llvm-pdbutil pretty -types -exclude-types="GlobalTypedef|NestedTypedef" \
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_TYPEDEFS %s
-; RUN: llvm-pdbdump pretty -classes -enums %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -classes -enums %p/Inputs/FilterTest.pdb \
; RUN: | FileCheck --check-prefix=EXCLUDE_TYPEDEFS %s
-; RUN: llvm-pdbdump pretty -types -exclude-types="GlobalEnum|NestedEnum" \
+; RUN: llvm-pdbutil pretty -types -exclude-types="GlobalEnum|NestedEnum" \
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_ENUMS %s
-; RUN: llvm-pdbdump pretty -classes -typedefs %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -classes -typedefs %p/Inputs/FilterTest.pdb \
; RUN: | FileCheck --check-prefix=EXCLUDE_ENUMS %s
-; RUN: llvm-pdbdump pretty -types -module-syms -globals -exclude-symbols="MemberVar|GlobalVar" \
+; RUN: llvm-pdbutil pretty -types -module-syms -globals -exclude-symbols="MemberVar|GlobalVar" \
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_VARS %s
-; RUN: llvm-pdbdump pretty -types -exclude-types="FilterTestClass" \
+; RUN: llvm-pdbutil pretty -types -exclude-types="FilterTestClass" \
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_WHOLE_CLASS %s
-; RUN: llvm-pdbdump pretty -module-syms -globals -exclude-compilands="FilterTest.obj" \
+; RUN: llvm-pdbutil pretty -module-syms -globals -exclude-compilands="FilterTest.obj" \
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_COMPILAND %s
-; RUN: llvm-pdbdump pretty -types -include-types="FilterTestClass" \
+; RUN: llvm-pdbutil pretty -types -include-types="FilterTestClass" \
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_TYPES %s
-; RUN: llvm-pdbdump pretty -types -module-syms -globals -include-symbols="[[:<:]](IntGlobalVar|DoubleGlobalVar)[[:>:]]" \
+; RUN: llvm-pdbutil pretty -types -module-syms -globals -include-symbols="[[:<:]](IntGlobalVar|DoubleGlobalVar)[[:>:]]" \
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_VARS %s
; NO_FILTER: ---TYPES---
diff --git a/test/tools/llvm-pdbdump/simple-padding-graphical.test b/test/tools/llvm-pdbdump/simple-padding-graphical.test
index 0e19f9cc7018..91da534ca010 100644
--- a/test/tools/llvm-pdbdump/simple-padding-graphical.test
+++ b/test/tools/llvm-pdbdump/simple-padding-graphical.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pretty -classes -class-definitions=layout \
+; RUN: llvm-pdbutil pretty -classes -class-definitions=layout \
; RUN: -include-types=SimplePad %p/Inputs/SimplePaddingTest.pdb > %t
; RUN: FileCheck -input-file=%t %s -check-prefix=NO_PADDING
diff --git a/test/tools/llvm-pdbdump/symbol-filters.test b/test/tools/llvm-pdbdump/symbol-filters.test
index d12d2aa8be0f..80c24baf17ca 100644
--- a/test/tools/llvm-pdbdump/symbol-filters.test
+++ b/test/tools/llvm-pdbdump/symbol-filters.test
@@ -1,25 +1,25 @@
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=data %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=data %p/Inputs/FilterTest.pdb \
; RUN: | FileCheck --check-prefix=ONLY_DATA %s
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=thunks %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=thunks %p/Inputs/FilterTest.pdb \
; RUN: | FileCheck --check-prefix=ONLY_THUNKS %s
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=funcs %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=funcs %p/Inputs/FilterTest.pdb \
; RUN: | FileCheck --check-prefix=ONLY_FUNCS %s
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=funcs -sym-types=data \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=funcs -sym-types=data \
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=TWO_TYPES %s
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=data \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=data \
; RUN: -symbol-order=name %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=NAME_SORT_DATA %s
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=data \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=data \
; RUN: -symbol-order=size %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=SIZE_SORT_DATA %s
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=funcs \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=funcs \
; RUN: -symbol-order=name %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=NAME_SORT_FUNCS %s
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=funcs \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=funcs \
; RUN: -symbol-order=size %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=SIZE_SORT_FUNCS %s
; ONLY_DATA-NOT: func
diff --git a/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri b/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri
deleted file mode 100755
index 9566ed5c0f14..000000000000
--- a/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri
+++ /dev/null
Binary files differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.elf-amdhsa-gfx803 b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-amdhsa-gfx803
new file mode 100644
index 000000000000..421269cbd8b2
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-amdhsa-gfx803
Binary files differ
diff --git a/test/tools/llvm-readobj/amdgpu-elf-definitions.test b/test/tools/llvm-readobj/amdgpu-elf-definitions.test
new file mode 100644
index 000000000000..c30931242df6
--- /dev/null
+++ b/test/tools/llvm-readobj/amdgpu-elf-definitions.test
@@ -0,0 +1,11 @@
+RUN: llvm-readobj -file-headers -program-headers -sections -symbols %p/Inputs/trivial.obj.elf-amdhsa-gfx803 | FileCheck %s
+
+CHECK: Format: ELF64-amdgpu-hsacobj
+CHECK: Arch: amdgcn
+CHECK: ElfHeader {
+CHECK: Ident {
+CHECK: OS/ABI: AMDGPU_HSA (0x40)
+CHECK: ABIVersion: 0
+CHECK: }
+CHECK: Machine: EM_AMDGPU (0xE0)
+CHECK: }
diff --git a/test/tools/llvm-readobj/amdgpu-elf-defs.test b/test/tools/llvm-readobj/amdgpu-elf-defs.test
deleted file mode 100644
index 9a576e8158f9..000000000000
--- a/test/tools/llvm-readobj/amdgpu-elf-defs.test
+++ /dev/null
@@ -1,28 +0,0 @@
-RUN: llvm-readobj -program-headers -sections -symbols -file-headers \
-RUN: %p/Inputs/trivial.elf-amdhsa-kaveri | FileCheck %s
-
-CHECK: ElfHeader {
-CHECK: Ident {
-CHECK: Class: 64-bit (0x2)
-CHECK: DataEncoding: LittleEndian (0x1)
-CHECK: Machine: EM_AMDGPU (0xE0)
-
-
-CHECK: Section {
-CHECK: Name: .text
-CHECK: Type: SHT_PROGBITS (0x1)
-CHECK: Flags [ (0xC00007
-CHECK: SHF_ALLOC (0x2)
-CHECK: SHF_AMDGPU_HSA_AGENT (0x800000)
-CHECK: SHF_AMDGPU_HSA_CODE (0x400000)
-CHECK: SHF_EXECINSTR (0x4)
-CHECK: SHF_WRITE (0x1)
-
-CHECK: Symbol {
-CHECK: Name: hello_world
-CHECK: Value: 0x0
-CHECK: Binding: Local (0x0)
-CHECK: Type: AMDGPU_HSA_KERNEL (0xA)
-
-CHECK: ProgramHeader {
-CHECK: Type: PT_AMDGPU_HSA_LOAD_CODE_AGENT (0x60000003)
diff --git a/test/tools/llvm-readobj/elf-sec-flags.test b/test/tools/llvm-readobj/elf-sec-flags.test
index 842ded3e9e59..8a977109238d 100644
--- a/test/tools/llvm-readobj/elf-sec-flags.test
+++ b/test/tools/llvm-readobj/elf-sec-flags.test
@@ -1,29 +1,6 @@
# Check that llvm-readobj shows arch specific ELF section flags.
-# RUN: yaml2obj -docnum 1 %s > %t-amdgpu.o
-# RUN: llvm-readobj -s %t-amdgpu.o | FileCheck -check-prefix=AMD %s
-
-# AMD: Flags [ (0x300000)
-# AMD-NEXT: SHF_AMDGPU_HSA_GLOBAL (0x100000)
-# AMD-NEXT: SHF_AMDGPU_HSA_READONLY (0x200000)
-# AMD-NEXT: ]
-
-# amdgpu.o
---- !ELF
-FileHeader:
- Class: ELFCLASS64
- Data: ELFDATA2LSB
- OSABI: ELFOSABI_GNU
- Type: ET_REL
- Machine: EM_AMDGPU
- Flags: []
-Sections:
- - Name: .amdgpu
- Type: SHT_PROGBITS
- Flags: [SHF_AMDGPU_HSA_GLOBAL, SHF_AMDGPU_HSA_READONLY]
- Size: 4
-
-# RUN: yaml2obj -docnum 2 %s > %t-hex.o
+# RUN: yaml2obj -docnum 1 %s > %t-hex.o
# RUN: llvm-readobj -s %t-hex.o | FileCheck -check-prefix=HEX %s
# HEX: Flags [ (0x10000000)
@@ -44,7 +21,7 @@ Sections:
Flags: [SHF_HEX_GPREL]
Size: 4
-# RUN: yaml2obj -docnum 3 %s > %t-mips.o
+# RUN: yaml2obj -docnum 2 %s > %t-mips.o
# RUN: llvm-readobj -s %t-mips.o | FileCheck -check-prefix=MIPS %s
# MIPS: Flags [ (0x38000000)
@@ -67,7 +44,7 @@ Sections:
Flags: [SHF_MIPS_GPREL, SHF_MIPS_MERGE, SHF_MIPS_NOSTRIP]
Size: 4
-# RUN: yaml2obj -docnum 4 %s > %t-x86_64.o
+# RUN: yaml2obj -docnum 3 %s > %t-x86_64.o
# RUN: llvm-readobj -s %t-x86_64.o | FileCheck -check-prefix=X86_64 %s
# X86_64: Flags [ (0x10000000)